fishhook原理

iOS使用fishhook

iOS开发中有时会需要交换两个方法的实现，也就是Method Swizzle，这种功能依赖Objective-C的动态特性实现。对于没有动态性的编程语言比如C语言来说，能不能交换两个方法的实现呢？fishhook开源库可以帮助我们交换两个C函数实现，看下它的基本使用方法。

引入fishhook源码到项目中，在项目中写入以下代码


static void(*sysNSLog)(NSString *format, ...);

void myNSLog(NSString *format, ...){
    format = [NSString stringWithFormat:@"NSLog modified, %@", format];
    //调用原有实现
    sysNSLog(format);
}


- (void)viewDidLoad {
    [super viewDidLoad];NSLog(@"nslog test");

    struct rebinding ns}log;
    nslog.name = "NSLog";                //替换的函数名
    nslog.replacement = myNSLog;         //新函数地址
    nslog.replaced = (void *)&sysNSLog;  //替换的函数指针
       
    struct rebinding arr[1] = {nslog};
    rebind_symbols(arr, 1);
    NSLog(@"nslog test");
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

每个被替换的函数name，replaced需要赋值，新函数地址存放在replacement字段。将这些信息组装成结构体，并生成结构体数组调用fishhook的rebind_symbols完成绑定。

fishhook原理

在了解fishhook原理之前，需要了解Mach-O文件，可以看这篇文章。
Mach-O文件介绍

我们从fishhook源码入手，先看入口函数。

int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
  if (retval < 0) {
    return retval;
  }
  // 首次调用的时候，增加回调，这个回调对于已经加载或者后序加载的image都会生效
  if (!_rebindings_head->next) {
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

rebind_sysmbols函数调用了prepend_rebindings，prepend_rebindings函数内部把即将hook的函数添加到_rebindings_head这个链表里面来。rebind_sysmbols首先判断_rebindings_head->next是否为空，为空的话代表rebind_sysmbols首次调用，这个时候有可能出现部分image还没有加载的情况，如果直接获取现有的image列表获取不到，所以要注册image加载的回调，回调函数是_rebind_symbols_for_image。这样可以保证后序的image加载时会重新调用_rebind_symbols_for_image完成hook。

可以看到fishhook是遍历所有的image，逐个尝试去hook。在每个循环里面实际调用了rebind_symbols_for_image这个函数。

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
  //校验image
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }

  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL;
  struct symtab_command* symtab_cmd = NULL;
  struct dysymtab_command* dysymtab_cmd = NULL;

 //遍历load_command，找到symtab_cmd，symtab_cmd
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      symtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }

  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

  // 找到基地址
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  //基地址加上符号表的偏移量，得到符号表的地址
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  //symtab_cmd保存了字符串表的偏移量
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

  // 动态符号表的地址，动态符号表会索引到符号表
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
  // 返回到load_command起始地址
  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      //需要找到DATA段
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        //segment里面存放的是section_t类型的数据
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

rebind_symbols_for_image函数先用dladdr校验image，dladdr可以获取image的基本信息，包括image的地址和名称。rebind_symbols_for_image的核心目的在于获取符号表symtab的地址，和动态符号表indirect_symtab地址，以及字符串表strtab的地址。

我们知道load_command类似于DATA端的索引，如果要找到三个表的地址，需要找到对应的command，通过遍历commands，可以找到symtab_cmd和dysymtab_cmd，字符串表没有对应的command，字符串表的地址可以通过符号表间接得出。找到command之后，就可以通过计算得出三个表的地址。

在这段函数中，还有两个变量需要注意，一个是slide，一个是sect。slide是系统生成的随机偏移量，用来随机化程序的执行地址，提高安全性。sect是符号表或者动态符号表对应的数据段section，它保存了section的大小、地址、偏移量等信息。

最后执行的函数是perform_rebinding_with_section，这里执行了替换函数实现的操作。

static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
 //动态表序号数组
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
 //动态符号表指针，存放的是符号的执行地址
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);

  for (uint i = 0; i < section->size / sizeof(void *); i++) {
    //动态表序号数组，获取到符号表的序号
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
    //使用符号表的序号获取到字符串中的偏移
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
    //符号名称
    char *symbol_name = strtab + strtab_offset;
    bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
    struct rebindings_entry *cur = rebindings;
    while (cur) {
      for (uint j = 0; j < cur->rebindings_nel; j++) {
        if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
          kern_return_t err;

          if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement)
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];

          err = vm_protect (mach_task_self (), (uintptr_t)indirect_symbol_bindings, section->size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
          if (err == KERN_SUCCESS) {
            //替换函数的实现
            indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          }
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

查找符号的过程大致是先找到动态符号表的索引，根据这个索引获取符号表的索引，根据符号表的索引在字符串表中获取符号的名称信息。如果字符串表中的名称和待替换实现的函数名称匹配，会发生函数地址的替换，从而完成了hook过程。

相关阅读:
cc2530用中断程序控制led灯亮灭
 淘宝/天猫API：brand_cat_top-获取分类推荐品牌列表
 Lambda表达式：一篇文章带你通透
 类似于推箱子的小游戏寻找最短路径
 基于对数谱图的深度学习心音分类
 使用消息队列的方式实现进程间通信，输入quit时退出
 QFluentWidgets: 基于 C++ Qt 的 Fluent Design 组件库
 电动车充电桩存在网络安全风险吗？
29. 【Android教程】折叠列表 ExpandableListView
驱动开发：内核枚举进程与线程ObCall回调
原文地址：https://blog.csdn.net/u011608357/article/details/127593199