本文汇编代码的平台及编译器:arm/gcc。分析函数调用栈的规则对于理解程序运行基本原理很有帮助,汇编代码分析crash问题也大有裨益。本文示例代码通过C语言函数调用一个汇编函数,再从汇编函数跳转回C函数,分析该示例的汇编代码就可以stack frame的创建和arm函数调用的传参规则。
问题:
2.1 传参规则
2.2 stack frame基本机构
示例代码包括两个源文件:transferParam.c和transferParam.S
transferParam.c:
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
-
- extern void bionic_clone(int flags, int* child_stack, int* parent_tid, int* tls, int* child_tid, int (*fn)(void*), int* arg);
-
- int child(void* v) {
- return 0;
- }
-
- void my_fork(int flags, int *child_stack, int *ptid, int *tls, int *child_tid, int *child, int *args) {
- printf("flags:%d sp:%p ptid:%p tls:%p child_tid:%p child:%p args:%d\n",
- flags, child_stack, ptid, tls, child_tid, child, *args);
- }
-
- int main(){
- int flags = 0;
- int *childStack = (int*)0x01;
- int *parent_tid = (int*)0x02;
- int *tls = (int*)0x03;
- int *child_tid = (int*)0x04;
- int arg = 5;
-
- printf("%s\n", "before bionic_clone");
- bionic_clone(flags, childStack, parent_tid, tls, child_tid, child, &arg);
- printf("%s\n", "after bionic_clone");
- return 0;
- }
c代码中调用了汇编函数bionic_clone,且参数超过4个,需要使用栈传递参数。
transferParam.S:
- .globl bionic_clone
- bionic_clone:
- push {fp, lr} @fp, lr入栈
- add fp, sp, #4 @fp = sp - 4
- @stmfd sp!, {r4, r5, r6}
- ldr r4, [fp, #4] @读取第五个参数到r4寄存器
- ldr r5, [fp, #8] @读取第六个参数到r5寄存器
- ldr r6, [fp, #12] @读取第7个参数到r6寄存器
- stmfd sp!, {r4, r5, r6} @r4, r5, r6入栈,以此给my_fork函数传参
- bl my_fork
- sub sp, fp, #4 @sp = fp - 4
- pop {fp, pc} @恢复fp, lr到fp和pc寄存器,实现函数返回
-
- .type bionic_clone,%function
代码执行到ldr r6, [fp, #12] stack frame图示:
正如main函数通过栈给bionic_clone函数传递第5,6,7三个参数,bionic_clone也将r4 r5 r6入栈给my_fork传递参数
- void bar(int a , int b ) {
- printf("bar\n");
- a = a + b;
- printf("%d\n",a);
- }
-
- void foo() {
- int a = 0;
- int b = 1;
- bar(a, b);
- }
-
- int main(int argc, char *argv[]) {
- foo();
- }
- ~
反汇编代码:
-
- 000000000040072c <bar>:
- 40072c: a9be7bfd stp x29, x30, [sp,#-32]!
- 400730: 910003fd mov x29, sp
- 400734: b9001fa0 str w0, [x29,#28]
- 400738: b9001ba1 str w1, [x29,#24]
- 40073c: 90000000 adrp x0, 400000 <_init-0x598>
- 400740: 91216000 add x0, x0, #0x858
- 400744: 97ffffaf bl 400600 <puts@plt>
- 400748: b9401fa1 ldr w1, [x29,#28]
- 40074c: b9401ba0 ldr w0, [x29,#24]
- 400750: 0b000020 add w0, w1, w0
- 400754: b9001fa0 str w0, [x29,#28]
- 400758: 90000000 adrp x0, 400000 <_init-0x598>
- 40075c: 91218000 add x0, x0, #0x860
- 400760: b9401fa1 ldr w1, [x29,#28]
- 400764: 97ffffab bl 400610 <printf@plt>
- 400768: d503201f nop
- 40076c: a8c27bfd ldp x29, x30, [sp],#32
- 400770: d65f03c0 ret
-
- 0000000000400774 <foo>:
- 400774: a9be7bfd stp x29, x30, [sp,#-32]!
- 400778: 910003fd mov x29, sp
- 40077c: b9001fbf str wzr, [x29,#28]
- 400780: 52800020 mov w0, #0x1 // #1
- 400784: b9001ba0 str w0, [x29,#24]
- 400788: b9401ba1 ldr w1, [x29,#24]
- 40078c: b9401fa0 ldr w0, [x29,#28]
- 400790: 97ffffe7 bl 40072c <bar>
- 400794: d503201f nop
- 400798: a8c27bfd ldp x29, x30, [sp],#32
- 40079c: d65f03c0 ret
-
- 00000000004007a0 <main>:
- 4007a0: a9be7bfd stp x29, x30, [sp,#-32]!
- 4007a4: 910003fd mov x29, sp
- 4007a8: b9001fa0 str w0, [x29,#28]
- 4007ac: f9000ba1 str x1, [x29,#16]
- 4007b0: 97fffff1 bl 400774 <foo>
- 4007b4: 52800000 mov w0, #0x0 // #0
- 4007b8: a8c27bfd ldp x29, x30, [sp],#32
- 4007bc: d65f03c0 ret
arm64栈帧结构:
5. 实战,内核如何dump bactrace
为了加深stack frame的理解,可以分析arm64如何dump bactrace。内核配置CONFIG_FRAME_POINTER可以基于fp栈回溯。基本原理可以看栈帧结构中,比如arm64小节示例代码中,main调用foo,foo调用bar,我们从bar开始回溯栈帧,如果我们先得到bar的x29值,那么从x29 + 8处保存了x30,即为caller调用者的地址,bar x29又可以回溯到foo函数的栈帧结构,依次类推就可以回溯整个函数调用。
kernel-4.14/arch/arm64/kernel/traps.c:
- void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
- {
- struct stackframe frame;
- int skip;
-
- pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
-
- if (!tsk)
- tsk = current;
-
- if (!try_get_task_stack(tsk))
- return;
-
- //假设是dump当前task的backtrace
- if (tsk == current) {
- //__builtin_frame_address是编译内置函数,返回当前栈栈帧地址即x29.
- frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.pc = (unsigned long)dump_backtrace;
- } else {
- /*
- * task blocked in __switch_to
- */
- frame.fp = thread_saved_fp(tsk);
- frame.pc = thread_saved_pc(tsk);
- }
-
- skip = !!regs;
- printk("Call trace:\n");
- while (1) {
- unsigned long stack;
- int ret;
-
- //dump_backtrace_entry打印frame.pc的值
- /* skip until specified stack frame */
- if (!skip) {
- dump_backtrace_entry(frame.pc);
- } else if (frame.fp == regs->regs[29]) {
- skip = 0;
- /*
- * Mostly, this is the case where this function is
- * called in panic/abort. As exception handler's
- * stack frame does not contain the corresponding pc
- * at which an exception has taken place, use regs->pc
- * instead.
- */
- dump_backtrace_entry(regs->pc);
- }
- ret = unwind_frame(tsk, &frame);
- if (ret < 0)
- break;
- if (in_entry_text(frame.pc)) {
- stack = frame.fp - offsetof(struct pt_regs, stackframe);
-
- if (on_accessible_stack(tsk, stack))
- dump_mem("", "Exception stack", stack,
- stack + sizeof(struct pt_regs));
- }
- }
-
- put_task_stack(tsk);
- }
-
-
-
- /*
- * AArch64 PCS assigns the frame pointer to x29.
- *
- * A simple function prologue looks like this:
- * sub sp, sp, #0x10
- * stp x29, x30, [sp]
- * mov x29, sp
- *
- * A simple function epilogue looks like this:
- * mov sp, x29
- * ldp x29, x30, [sp]
- * add sp, sp, #0x10
- */
- int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
- {
- unsigned long fp = frame->fp;
-
- if (fp & 0xf)
- return -EINVAL;
-
- if (!tsk)
- tsk = current;
-
- if (!on_accessible_stack(tsk, fp))
- return -EINVAL;
-
- //获取上一级(caller)的fp值,具体可以看arm64栈帧结构
- frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
-
- //fp+8存储的是caller调用之的地址(即返回地址),具体可以对着arm64栈帧结构看
- frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
-
- /*
- * Frames created upon entry from EL0 have NULL FP and PC values, so
- * don't bother reporting these. Frames created by __noreturn functions
- * might have a valid FP even if PC is bogus, so only terminate where
- * both are NULL.
- */
- if (!frame->fp && !frame->pc)
- return -EINVAL;
-
- return 0;
- }