进程创建的时候Linux内核会创建内核栈(arm手册也要求内核态有单独的栈),如应用进程在用户态通过系统调用陷入内核态的时候,上下文信息(如cpu寄存器)需要有个地方保存,如此,从内核态切换回用户态时候,能继续从系统调用之后的代码开始执行,这个保存的地方就是进程的内核栈,本文主要描述arm32下内核栈的生成过程和结构。
正如进程在用户态执行函数跳转有一个栈,在内核态执行的时候同样有一个内核态的栈,分成两个栈也是处于安全的考虑,如果都使用用户态的栈,那么内核的数据可以被应用态访问不安全。我们不禁要问如下几个问题:
标识进程的核心数据结构task_struct中有一个void *stack成员指向进程内核栈:
- struct task_struct {
- #ifdef CONFIG_THREAD_INFO_IN_TASK
- /*
- * For reasons of header soup (see current_thread_info()), this
- * must be the first element of task_struct.
- */
- struct thread_info thread_info;
- #endif
- void * stack;
- ...
- }
目前平台没有配置 CONFIG_THREAD_INFO_IN_TASK,所以thread_info放在了stack指向的内存中,thread_info中存储了体系结构相关的信息,arm32 内核栈大小8KB:
- //ARM架构 , 8K
- #define THREAD_SIZE_ORDER 1
- #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
- #define THREAD_START_SP (THREAD_SIZE - 8)
- static inline void *task_stack_page(const struct task_struct *task)
- {
- return task->stack;
- }
- #define task_pt_regs(p) \
- ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
- struct pt_regs {
- unsigned long uregs[18];
- };
-
- #define ARM_cpsr uregs[16]
- #define ARM_pc uregs[15]
- #define ARM_lr uregs[14]
- #define ARM_sp uregs[13]
- #define ARM_ip uregs[12]
- #define ARM_fp uregs[11]
- #define ARM_r10 uregs[10]
- #define ARM_r9 uregs[9]
- #define ARM_r8 uregs[8]
- #define ARM_r7 uregs[7]
- #define ARM_r6 uregs[6]
- #define ARM_r5 uregs[5]
- #define ARM_r4 uregs[4]
- #define ARM_r3 uregs[3]
- #define ARM_r2 uregs[2]
- #define ARM_r1 uregs[1]
- #define ARM_r0 uregs[0]
- #define ARM_ORIG_r0 uregs[17]
进程从用户态陷入内核态时候,用户态的上下文信息保存在pt_regs数据结构中。
- /*
- * low level task data that entry.S needs immediate access to.
- * __switch_to() assumes cpu_context follows immediately after cpu_domain.
- */
- struct thread_info {
- unsigned long flags; /* low level flags */
- int preempt_count; /* 0 => preemptable, <0 => bug */
- mm_segment_t addr_limit; /* address limit */
- struct task_struct *task; /* main task structure */
- __u32 cpu; /* cpu */
- __u32 cpu_domain; /* cpu domain */
- struct cpu_context_save cpu_context; /* cpu context */
- __u32 syscall; /* syscall number */
- __u8 used_cp[16]; /* thread used copro */
- unsigned long tp_value[2]; /* TLS registers */
- #ifdef CONFIG_CRUNCH
- struct crunch_state crunchstate;
- #endif
- union fp_state fpstate __attribute__((aligned(8)));
- union vfp_state vfpstate;
- #ifdef CONFIG_ARM_THUMBEE
- unsigned long thumbee_state; /* ThumbEE Handler Base register */
- #endif
- void *regs_on_excp; /* aee */
- int cpu_excp; /* aee */
- };
-
- struct cpu_context_save {
- __u32 r4;
- __u32 r5;
- __u32 r6;
- __u32 r7;
- __u32 r8;
- __u32 r9;
- __u32 sl;
- __u32 fp;
- __u32 sp;
- __u32 pc;
- __u32 extra[2]; /* Xscale 'acc' register, etc */
- };
我们知道进程在内核态执行的时候,sp寄存器指向了内核栈,为什么内核的sp寄存器指向进程内核栈?这是什么时候设置的?
答案:进程上下文切换的时候(switch_to汇编)
首先进程创建的时候,在copy_thread会创建内核栈,并将内核栈地址保存在thread_info->cpu_context中,代码如下:
- //参数p时指新建进程的task_struct
- int
- copy_thread(unsigned long clone_flags, unsigned long stack_start,
- {
- struct thread_info *thread = task_thread_info(p);
- struct pt_regs *childregs = task_pt_regs(p);
-
- memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
-
- #ifdef CONFIG_CPU_USE_DOMAINS
- /*
- * Copy the initial value of the domain access control register
- * from the current thread: thread->addr_limit will have been
- * copied from the current thread via setup_thread_stack() in
- * kernel/fork.c
- */
- thread->cpu_domain = get_domain();
- #endif
-
- if (likely(!(p->flags & PF_KTHREAD))) {
- *childregs = *current_pt_regs();
- childregs->ARM_r0 = 0;
- if (stack_start)
- childregs->ARM_sp = stack_start;
- } else {
- memset(childregs, 0, sizeof(struct pt_regs));
- thread->cpu_context.r4 = stk_sz;
- thread->cpu_context.r5 = stack_start;
- childregs->ARM_cpsr = SVC_MODE;
- }
- thread->cpu_context.pc = (unsigned long)ret_from_fork;
- thread->cpu_context.sp = (unsigned long)childregs;
-
- clear_ptrace_hw_breakpoint(p);
-
- if (clone_flags & CLONE_SETTLS)
- thread->tp_value[0] = childregs->ARM_r3;
- thread->tp_value[1] = get_tpuser();
-
- thread_notify(THREAD_NOTIFY_COPY, thread);
-
- return 0;
- }
thread->cpu_context.pc = (unsigned long) ret_from_fork设置新建进程的执行入口时ret_from_frok函数。
thread->cpu_context.sp = (unsigned long)childregs;thread_info成员cpu_context的sp成员指向了内核栈的pt_regs数据结构,pt_regs保存了用户态的通用寄存器。
上下文切换switch_to函数会将thread->cpu_context.sp设置到cpu的寄存器中,那么其中的sp就设置了内核态的sp寄存器中:
- /*
- * Register switch for ARMv3 and ARMv4 processors
- * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
- * previous and next are guaranteed not to be the same.
- */
- ENTRY(__switch_to)
- UNWIND(.fnstart )
- UNWIND(.cantunwind )
- add ip, r1, #TI_CPU_SAVE @ip指向被换出进程的thread_info->cpu_context
- ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack,即保存到cpu_context中
- THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
- THUMB( str sp, [ip], #4 )
- THUMB( str lr, [ip], #4 )
- ldr r4, [r2, #TI_TP_VALUE]
- ldr r5, [r2, #TI_TP_VALUE + 4]
- #ifdef CONFIG_CPU_USE_DOMAINS
- mrc p15, 0, r6, c3, c0, 0 @ Get domain register
- str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register
- ldr r6, [r2, #TI_CPU_DOMAIN]
- #endif
- switch_tls r1, r4, r5, r3, r7
- #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
- ldr r7, [r2, #TI_TASK]
- ldr r8, =__stack_chk_guard
- .if (TSK_STACK_CANARY > IMM12_MASK)
- add r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK
- .endif
- ldr r7, [r7, #TSK_STACK_CANARY & IMM12_MASK]
- #endif
- #ifdef CONFIG_CPU_USE_DOMAINS
- mcr p15, 0, r6, c3, c0, 0 @ Set domain register
- #endif
- mov r5, r0
- add r4, r2, #TI_CPU_SAVE @r4指向换入进程的cpu_context
- ldr r0, =thread_notify_head
- mov r1, #THREAD_NOTIFY_SWITCH
- bl atomic_notifier_call_chain
- #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
- str r7, [r8]
- #endif
- THUMB( mov ip, r4 )
- mov r0, r5
- ARM( ldmia r4, {r4 - sl, fp, sp, pc} ) @ Load all regs saved previously,即将cpu_context中值加载到cpu寄存器中
- THUMB( ldmia ip!, {r4 - sl, fp} ) @ Load all regs saved previously
- THUMB( ldr sp, [ip], #4 )
- THUMB( ldr pc, [ip] )
- UNWIND(.fnend )
- ENDPROC(__switch_to)
ARM( ldmia r4, {r4 - sl, fp, sp, pc} )会将进程thread_info->cpu_context中的值加载到cpu寄存器执行,上面分析我们知道进程创建的时候,thread->cpu_context.sp = (unsigned long)childregs,这样childregs值会加载到cpu sp寄存器,即内核态下sp指向了内核栈(更具体的说是内核栈中的pt_regs)