iOS使用fishhook
iOS开发中有时会需要交换两个方法的实现,也就是Method Swizzle,这种功能依赖Objective-C的动态特性实现。对于没有动态性的编程语言比如C语言来说,能不能交换两个方法的实现呢?fishhook开源库可以帮助我们交换两个C函数实现,看下它的基本使用方法。
引入fishhook源码到项目中,在项目中写入以下代码
static void(*sysNSLog)(NSString *format, ...);
void myNSLog(NSString *format, ...){
format = [NSString stringWithFormat:@"NSLog modified, %@", format];
//调用原有实现
sysNSLog(format);
}
- (void)viewDidLoad {
[super viewDidLoad];NSLog(@"nslog test");
struct rebinding ns}log;
nslog.name = "NSLog"; //替换的函数名
nslog.replacement = myNSLog; //新函数地址
nslog.replaced = (void *)&sysNSLog; //替换的函数指针
struct rebinding arr[1] = {nslog};
rebind_symbols(arr, 1);
NSLog(@"nslog test");
}
每个被替换的函数name,replaced需要赋值,新函数地址存放在replacement字段。将这些信息组装成结构体,并生成结构体数组调用fishhook的rebind_symbols完成绑定。
fishhook原理
在了解fishhook原理之前,需要了解Mach-O文件,可以看这篇文章。
Mach-O文件介绍
我们从fishhook源码入手,先看入口函数。
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// 首次调用的时候,增加回调,这个回调对于已经加载或者后序加载的image都会生效
if (!_rebindings_head->next) {
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
rebind_sysmbols函数调用了prepend_rebindings,prepend_rebindings函数内部把即将hook的函数添加到_rebindings_head这个链表里面来。rebind_sysmbols首先判断_rebindings_head->next是否为空,为空的话代表rebind_sysmbols首次调用,这个时候有可能出现部分image还没有加载的情况,如果直接获取现有的image列表获取不到,所以要注册image加载的回调,回调函数是_rebind_symbols_for_image。这样可以保证后序的image加载时会重新调用_rebind_symbols_for_image完成hook。
可以看到fishhook是遍历所有的image,逐个尝试去hook。在每个循环里面实际调用了rebind_symbols_for_image这个函数。
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
//校验image
Dl_info info;
if (dladdr(header, &info) == 0) {
return;
}
segment_command_t *cur_seg_cmd;
segment_command_t *linkedit_segment = NULL;
struct symtab_command* symtab_cmd = NULL;
struct dysymtab_command* dysymtab_cmd = NULL;
//遍历load_command,找到symtab_cmd,symtab_cmd
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
symtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}
// 找到基地址
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
//基地址加上符号表的偏移量,得到符号表的地址
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
//symtab_cmd保存了字符串表的偏移量
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// 动态符号表的地址,动态符号表会索引到符号表
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
// 返回到load_command起始地址
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
//需要找到DATA段
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
//segment里面存放的是section_t类型的数据
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
rebind_symbols_for_image函数先用dladdr校验image,dladdr可以获取image的基本信息,包括image的地址和名称。rebind_symbols_for_image的核心目的在于获取符号表symtab的地址,和动态符号表indirect_symtab地址,以及字符串表strtab的地址。
我们知道load_command类似于DATA端的索引,如果要找到三个表的地址,需要找到对应的command,通过遍历commands,可以找到symtab_cmd和dysymtab_cmd,字符串表没有对应的command,字符串表的地址可以通过符号表间接得出。找到command之后,就可以通过计算得出三个表的地址。
在这段函数中,还有两个变量需要注意,一个是slide,一个是sect。slide是系统生成的随机偏移量,用来随机化程序的执行地址,提高安全性。sect是符号表或者动态符号表对应的数据段section,它保存了section的大小、地址、偏移量等信息。
最后执行的函数是perform_rebinding_with_section,这里执行了替换函数实现的操作。
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
//动态表序号数组
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
//动态符号表指针,存放的是符号的执行地址
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i < section->size / sizeof(void *); i++) {
//动态表序号数组,获取到符号表的序号
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
//使用符号表的序号获取到字符串中的偏移
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
//符号名称
char *symbol_name = strtab + strtab_offset;
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
kern_return_t err;
if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement)
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
err = vm_protect (mach_task_self (), (uintptr_t)indirect_symbol_bindings, section->size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
if (err == KERN_SUCCESS) {
//替换函数的实现
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
}
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
}
查找符号的过程大致是先找到动态符号表的索引,根据这个索引获取符号表的索引,根据符号表的索引在字符串表中获取符号的名称信息。如果字符串表中的名称和待替换实现的函数名称匹配,会发生函数地址的替换,从而完成了hook过程。