分为第一阶段汇编和第二阶段。
先总结出第一阶段主要任务
arch/arm/kernel/vmlinux.lds
如下,可以看出内核入口在ENTRY(stext),stext在汇编中head.S中实现
- OUTPUT_ARCH(arm)
- ENTRY(stext)
- jiffies = jiffies_64;
- SECTIONS
- {
- /*
- * XXX: The linker does not define how output sections are
- * assigned to input sections when there are multiple statements
- * matching the same input section name. There is no documented
- * order of matching.
- *
- * unwind exit sections must be discarded before the rest of the
- * unwind sections get included.
- */
- /DISCARD/ : {
- *(.ARM.exidx.exit.text)
- *(.ARM.extab.exit.text)
- *(.exitcall.exit)
- *(.discard)
- *(.discard.*)
- }
- . = 0x80000000 + 0x00008000;
- .head.text : {
- _text = .;
- *(.head.text)
- }
- .text : { /* Real text segment */
- _stext = .; /* Text and read-only data */
- . = ALIGN(8); __idmap_text_start = .; *(.idmap.text) __idmap_text_end = .; . = ALIGN((1 << 12)); __hyp_idmap_text_start = .; *(.hyp.idmap. text) __hyp_idmap_text_end = .;
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
-
- . = ALIGN(8); *(.text.hot) *(.text .text.fixup) *(.ref.text) *(.text.unlikely)
- . = ALIGN(8); __sched_text_start = .; *(.sched.text) __sched_text_end = .;
- . = ALIGN(8); __lock_text_start = .; *(.spinlock.text) __lock_text_end = .;
注释看出,启动内核要求:MMU关闭、DCACHE关闭,ICACHE无所谓,然后传入r0的参数为0,r1是机器ID,r2是tag或者设备树地址指针。02-uboot启动内核前到底做了哪些必要工作_【星星之火】的博客-CSDN博客,在这边文章已经说明了这点,就是Bootloader调用 kernel_entry(0, machid, r2) 传入r0 r1 r2进入到此处。
- 59 /*
- 60 * Kernel startup entry point.
- 61 * ---------------------------
- 62 *
- 63 * This is normally called from the decompressor code. The requirements
- 64 * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
- 65 * r1 = machine nr, r2 = atags or dtb pointer.
- 66 *
- 67 * This code is mostly position independent, so if you link the kernel at
- 68 * 0xc0008000, you call this at __pa(0xc0008000).
- 69 *
- 70 * See linux/arch/arm/tools/mach-types for the complete list of machine
- 71 * numbers for r1.
- 72 *
- 73 * We're trying to keep crap to a minimum; DO NOT add any machine specific
- 74 * crap here - that's what the boot loader (or in extreme, well justified
- 75 * circumstances, zImage) is for.
- 76 */
- 77 .arm
- 78
- 79 __HEAD
- 80 ENTRY(stext)
- 81 ARM_BE8(setend be ) @ ensure we are in BE8 mode
- 82
- 83 THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM.
- 84 THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
- 85 THUMB( .thumb ) @ switch to Thumb now.
- 86 THUMB(1: )
- 87
- 88 #ifdef CONFIG_ARM_VIRT_EXT
- 89 bl __hyp_stub_install
- 90 #endif
- 91 @ ensure svc mode and all interrupts masked
- 92 safe_svcmode_maskall r9
- 93
- 94 mrc p15, 0, r9, c0, c0 @ get processor id
- 95 bl __lookup_processor_type @ r5=procinfo r9=cpuid
- 96 movs r10, r5 @ invalid processor (r5=0)?
- 97 THUMB( it eq ) @ force fixup-able long branch encoding
- 98 beq __error_p @ yes, error 'p'
- arch/arm/include/asm/assembler.h
- 320 #if __LINUX_ARM_ARCH__ >= 6 && !defined(CONFIG_CPU_V7M)
- 321 mrs \reg , cpsr
- 322 eor \reg, \reg, #HYP_MODE
- 323 tst \reg, #MODE_MASK
- 324 bic \reg , \reg , #MODE_MASK
- 325 orr \reg , \reg , #PSR_I_BIT | PSR_F_BIT | SVC_MODE
- 326 THUMB( orr \reg , \reg , #PSR_T_BIT )
- 327 bne 1f
- 328 orr \reg, \reg, #PSR_A_BIT
- 329 adr lr, BSYM(2f)
- 330 msr spsr_cxsf, \reg
- 331 __MSR_ELR_HYP(14)
- 332 __ERET
- 333 1: msr cpsr_c, \reg
- 334 2:
- 335 #else
操作系统保护模式
参考ARM® Architecture Reference Manual ARMv7-A and ARMv7-R edition
设置CPSR的低5位。

系统进行初始化过程中,资源未准备好,产生中断将出现不可预计的错误。关闭中断方法同上寄存器中。

从协处理器p15中的c0寄存器中获取CPU ID
mrc p15, 0, r9, c0, c0
通过如下结构体存储,里面包含了MMU启动信息,所有在早起就需要获取到。在内核文件的“.init.proc.info”段中存放,根据CPU ID查找procinfo信息。
- 20 /*
- 21 * Note! struct processor is always defined if we're
- 22 * using MULTI_CPU, otherwise this entry is unused,
- 23 * but still exists.
- 24 *
- 25 * NOTE! The following structure is defined by assembly
- 26 * language, NOT C code. For more information, check:
- 27 * arch/arm/mm/proc-*.S and arch/arm/kernel/head.S
- 28 */
- 29 struct proc_info_list {
- 30 unsigned int cpu_val;
- 31 unsigned int cpu_mask;
- 32 unsigned long __cpu_mm_mmu_flags; /* used by head.S */
- 33 unsigned long __cpu_io_mmu_flags; /* used by head.S */
- 34 unsigned long __cpu_flush; /* used by head.S */
- 35 const char *arch_name;
- 36 const char *elf_name;
- 37 unsigned int elf_hwcap;
- 38 const char *cpu_name;
- 39 struct processor *proc;
- 40 struct cpu_tlb_fns *tlb;
- 41 struct cpu_user_fns *user;
- 42 struct cpu_cache_fns *cache;
- 43 };
__vet_atags中,检查设备树头部4字节是否是magic(d00dfeed)。
- 118 * r1 = machine no, r2 = atags or dtb,
- 119 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
- 120 */
- 121 bl __vet_atags
- 122 #ifdef CONFIG_SMP_ON_UP
- 123 bl __fixup_smp
- 124 #endif
- 125 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
- 126 bl __fixup_pv_table
- 127 #endif
- 128 bl __create_page_tables
内核启动后,要尽快开启MMU,开启MMU之前需要有页表。因此准备好页表环境是开启MMU的前提。由__create_page_tables 来创建。
- 108 #ifndef CONFIG_XIP_KERNEL ~
- 109 adr r3, 2f ~
- 110 ldmia r3, {r4, r8} ~
- 111 sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) ~
- 112 add r8, r8, r4 @ PHYS_OFFSET ~
- 113 #else ~
- 114 ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case ~
- 115 #endif ~
- 116 ~
- 117 /* ~
- 118 * r1 = machine no, r2 = atags or dtb, ~
- 119 * r8 = phys_offset, r9 = cpuid, r10 = procinfo ~
- 120 */ ~
- 121 bl __vet_atags ~
- 122 #ifdef CONFIG_SMP_ON_UP ~
- 123 bl __fixup_smp ~
- 124 #endif ~
- 125 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT ~
- 126 bl __fixup_pv_table ~
- 127 #endif ~
- 128 bl __create_page_tables
通过页表完成虚拟地址到物理地址的映射。页表基地址存储在cp15的c2寄存器中。
将4G空间分成4096个1M的段空间。虚拟地址的高12bit是段基址,在页表基地址的基础上得出虚拟地址段页表的位置,在加上虚拟地址的低20bit得出物理地址(MMU硬件自动完成地址转换)。
开启MMU之前 和开启MMU瞬间,物理地址到虚拟地址的pc指针改变的时候,需要是的虚拟地址和物理地址一致。
- 152 /*
- 153 * Setup the initial page tables. We only setup the barest
- 154 * amount which are required to get the kernel running, which
- 155 * generally means mapping in the kernel code.
- 156 *
- 157 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
- 158 *
- 159 * Returns:
- 160 * r0, r3, r5-r7 corrupted
- 161 * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
- 162 */
- 163 __create_page_tables:
- 164 pgtbl r4, r8 @ page table address 1.页表物理地址给r4
- 165
- 166 /*
- 167 * Clear the swapper page table 2.清空页表空间
- 168 */
- 169 mov r0, r4
- 170 mov r3, #0
- 171 add r6, r0, #PG_DIR_SIZE
- 172 1: str r3, [r0], #4
- 173 str r3, [r0], #4
- 174 str r3, [r0], #4
- 175 str r3, [r0], #4
- 176 teq r0, r6
- 177 bne 1b
- 178
- 179 #ifdef CONFIG_ARM_LPAE
- 180 /*
- 181 * Build the PGD table (first level) to point to the PMD table. A PGD
- 182 * entry is 64-bit wide.
- 183 */
- 184 mov r0, r4
- 185 add r3, r4, #0x1000 @ first PMD table address
- 186 orr r3, r3, #3 @ PGD block type
- 187 mov r6, #4 @ PTRS_PER_PGD
- 188 mov r7, #1 << (55 - 32) @ L_PGD_SWAPPER
- 189 1:
- 190 #ifdef CONFIG_CPU_ENDIAN_BE8
- 191 str r7, [r0], #4 @ set top PGD entry bits
- 192 str r3, [r0], #4 @ set bottom PGD entry bits
- 193 #else
- 194 str r3, [r0], #4 @ set bottom PGD entry bits
- 195 str r7, [r0], #4 @ set top PGD entry bits
- 196 #endif
- 197 add r3, r3, #0x1000 @ next PMD table
- 198 subs r6, r6, #1
- 199 bne 1b
- 200
- 201 add r4, r4, #0x1000 @ point to the PMD tables
- 202 #ifdef CONFIG_CPU_ENDIAN_BE8
- 203 add r4, r4, #4 @ we only write the bottom word
- 204 #endif
- 205 #endif
- 206
- 207 ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags //将proc中mmuflag信息存到r10
- 208
- 209 /* 3.创建__turn_mmu_on的恒等映射,衔接mmu开启动作
- 210 * Create identity mapping to cater for __enable_mmu.
- 211 * This identity mapping will be removed by paging_init().
- 212 */
- 213 adr r0, __turn_mmu_on_loc
- 214 ldmia r0, {r3, r5, r6}
- 215 sub r0, r0, r3 @ virt->phys offset
- 216 add r5, r5, r0 @ phys __turn_mmu_on
- 217 add r6, r6, r0 @ phys __turn_mmu_on_end
- 218 mov r5, r5, lsr #SECTION_SHIFT
- 219 mov r6, r6, lsr #SECTION_SHIFT
- 220
- 221 1: orr r3, r7, r5, lsl #SECTION_SHIFT @ flags + kernel base
- 222 str r3, [r4, r5, lsl #PMD_ORDER] @ identity mapping
- 223 cmp r5, r6
- 224 addlo r5, r5, #1 @ next section
- 225 blo 1b
- 226
- 227 /* 4.内核映射
- 228 * Map our RAM from the start to the end of the kernel .bss section.
- 229 */
- 230 add r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
- 231 ldr r6, =(_end - 1)
- 232 orr r3, r8, r7
- 233 add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
- 234 1: str r3, [r0], #1 << PMD_ORDER
- 235 add r3, r3, #1 << SECTION_SHIFT
- 236 cmp r0, r6
- 237 bls 1b
- 238
- 239 #ifdef CONFIG_XIP_KERNEL
- 242 */
- 246 orr r3, r7, r3, lsl #SECTION_SHIFT
- 247 add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
- 248 str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
- 252 1: cmp r0, r6
- 253 add r3, r3, #1 << SECTION_SHIFT
- 251 add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
- 252 1: cmp r0, r6
- 253 add r3, r3, #1 << SECTION_SHIFT
- 254 strls r3, [r0], #1 << PMD_ORDER
- 255 bls 1b
- 256 #endif
- 257
- 258 /* 5.dtb映射页表
- 259 * Then map boot params address in r2 if specified.
- 260 * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
- 261 */
- 262 mov r0, r2, lsr #SECTION_SHIFT
- 263 movs r0, r0, lsl #SECTION_SHIFT
- 264 subne r3, r0, r8
- 265 addne r3, r3, #PAGE_OFFSET
- 266 addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
- 267 orrne r6, r7, r0
- 268 strne r6, [r3], #1 << PMD_ORDER
- 269 addne r6, r6, #1 << SECTION_SHIFT
- 270 strne r6, [r3]
通过下面几个函数调用,开启MMU,也就是操作cp15协处理器,最终通过设置协处理器的c1的bit0为1开启MMU。之后跳转到__mmap_switched,准备开启第二阶段。
- 130 /*
- 131 * The following calls CPU specific code in a position independent
- 132 * manner. See arch/arm/mm/proc-*.S for details. r10 = base of
- 133 * xxx_proc_info structure selected by __lookup_processor_type
- 134 * above. On return, the CPU will be ready for the MMU to be
- 135 * turned on, and r0 will hold the CPU control register value.
- 136 */
- 137 ldr r13, =__mmap_switched @ address to jump to after
- 138 @ mmu has been enabled
- 139 adr lr, BSYM(1f) @ return (PIC) address
- 140 mov r8, r4 @ set TTBR1 to swapper_pg_dir
- 141 ldr r12, [r10, #PROCINFO_INITFUNC]
- 142 add r12, r12, r10
- 143 ret r12
- 144 1: b __enable_mmu
- 428 __enable_mmu:
- 429 #if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
- 430 orr r0, r0, #CR_A
- 431 #else
- 432 bic r0, r0, #CR_A
- 433 #endif
- 434 #ifdef CONFIG_CPU_DCACHE_DISABLE
- 435 bic r0, r0, #CR_C
- 436 #endif
- 437 #ifdef CONFIG_CPU_BPREDICT_DISABLE
- 438 bic r0, r0, #CR_Z
- 439 #endif
- 440 #ifdef CONFIG_CPU_ICACHE_DISABLE
- 441 bic r0, r0, #CR_I
- 442 #endif
- 443 #ifndef CONFIG_ARM_LPAE
- 444 mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
- 445 domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
- 446 domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
- 447 domain_val(DOMAIN_IO, DOMAIN_CLIENT))
- 448 mcr p15, 0, r5, c3, c0, 0 @ load domain access register
- 449 mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
- 450 #endif
- 451 b __turn_mmu_on
- 452 ENDPROC(__enable_mmu)
- 454 /*
- 455 * Enable the MMU. This completely changes the structure of the visible
- 456 * memory space. You will not be able to trace execution through this.
- 457 * If you have an enquiry about this, *please* check the linux-arm-kernel
- 458 * mailing list archives BEFORE sending another post to the list.
- 459 *
- 460 * r0 = cp#15 control register
- 461 * r1 = machine ID
- 462 * r2 = atags or dtb pointer
- 463 * r9 = processor ID
- 464 * r13 = *virtual* address to jump to upon completion
- 465 *
- 466 * other registers depend on the function called upon completion
- 467 */
- 468 .align 5
- 469 .pushsection .idmap.text, "ax"
- 470 ENTRY(__turn_mmu_on)
- 471 mov r0, r0
- 472 instr_sync
- 473 mcr p15, 0, r0, c1, c0, 0 @ write control reg
- 474 mrc p15, 0, r3, c0, c0, 0 @ read id reg
- 475 instr_sync
- 476 mov r3, r3
- 477 mov r3, r13
- 478 ret r3
- 479 __turn_mmu_on_end:
- 480 ENDPROC(__turn_mmu_on)
- 481 .popsection
映射好__turn_mmu_on、内核、设备树后,开启MMU,一切开始运行在虚拟地址中,MMU开始工作。之后可以进入到C函数进行常规初始化。head.S调用__mmap_switched,完成C环境初始化后,跳转start_kernel。
- 71 /*
- 72 * The following fragment of code is executed with the MMU on in MMU mode,
- 73 * and uses absolute addresses; this is not position independent.
- 74 *
- 75 * r0 = cp#15 control register
- 76 * r1 = machine ID
- 77 * r2 = atags/dtb pointer
- 78 * r9 = processor ID
- 79 */
- 80 __INIT
- 81 __mmap_switched:
- 82 adr r3, __mmap_switched_data
- 83
- 84 ldmia r3!, {r4, r5, r6, r7}
- 85 cmp r4, r5 @ Copy data segment if needed
- 86 1: cmpne r5, r6
- 87 ldrne fp, [r4], #4
- 88 strne fp, [r5], #4
- 89 bne 1b
- 90
- 91 mov fp, #0 @ Clear BSS (and zero fp)
- 92 1: cmp r6, r7
- 93 strcc fp, [r6],#4
- 94 bcc 1b
- 95
- 96 ARM( ldmia r3, {r4, r5, r6, r7, sp})
- 97 THUMB( ldmia r3, {r4, r5, r6, r7} )
- 98 THUMB( ldr sp, [r3, #16] )
- 99 str r9, [r4] @ Save processor ID
- 100 str r1, [r5] @ Save machine type
- 101 str r2, [r6] @ Save atags pointer
- 102 cmp r7, #0
- 103 strne r0, [r7] @ Save control register values
- 104 b start_kernel
- 105 ENDPROC(__mmap_switched)
一大顿初始化,想必都很关键。
- asmlinkage __visible void __init start_kernel(void)
- {
- char *command_line;
- char *after_dashes;
-
- /*
- * Need to run as early as possible, to initialize the
- * lockdep hash:
- */
- lockdep_init();
- set_task_stack_end_magic(&init_task);
- smp_setup_processor_id();
- debug_objects_early_init();
-
- /*
- * Set up the the initial canary ASAP:
- */
- boot_init_stack_canary();
-
- cgroup_init_early();
-
- local_irq_disable();
- early_boot_irqs_disabled = true;
-
- /*
- * Interrupts are still disabled. Do necessary setups, then
- * enable them
- */
- boot_cpu_init();
- page_address_init();
- pr_notice("%s", linux_banner);
- setup_arch(&command_line);
- mm_init_cpumask(&init_mm);
- setup_command_line(command_line);
- setup_nr_cpu_ids();
- setup_per_cpu_areas();
- smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
-
- build_all_zonelists(NULL, NULL);
- page_alloc_init();
-
- pr_notice("Kernel command line: %s\n", boot_command_line);
- parse_early_param();
- after_dashes = parse_args("Booting kernel",
- static_command_line, __start___param,
- __stop___param - __start___param,
- -1, -1, &unknown_bootoption);
- if (!IS_ERR_OR_NULL(after_dashes))
- parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
- set_init_arg);
-
- jump_label_init();
-
- /*
- * These use large bootmem allocations and must precede
- * kmem_cache_init()
- */
- setup_log_buf(0);
- pidhash_init();
- vfs_caches_init_early();
- sort_main_extable();
- trap_init();
- mm_init();
-
- /*
- * Set up the scheduler prior starting any interrupts (such as the
- * timer interrupt). Full topology setup happens at smp_init()
- * time - but meanwhile we still have a functioning scheduler.
- */
- sched_init();
- /*
- * Disable preemption - early bootup scheduling is extremely
- * fragile until we cpu_idle() for the first time.
- */
- preempt_disable();
- if (WARN(!irqs_disabled(),
- "Interrupts were enabled *very* early, fixing it\n"))
- local_irq_disable();
- idr_init_cache();
- rcu_init();
-
- /* trace_printk() and trace points may be used after this */
- trace_init();
-
- context_tracking_init();
- radix_tree_init();
- /* init some links before init_ISA_irqs() */
- early_irq_init();
- init_IRQ();
- tick_init();
- rcu_init_nohz();
- init_timers();
- hrtimers_init();
- softirq_init();
- timekeeping_init();
- time_init();
- sched_clock_postinit();
- perf_event_init();
- profile_init();
- call_function_init();
- WARN(!irqs_disabled(), "Interrupts were enabled early\n");
- early_boot_irqs_disabled = false;
- local_irq_enable();
-
- kmem_cache_init_late();
-
- /*
- * HACK ALERT! This is early. We're enabling the console before
- * we've done PCI setups etc, and console_init() must be aware of
- * this. But we do want output early, in case something goes wrong.
- */
- console_init();
- if (panic_later)
- panic("Too many boot %s vars at `%s'", panic_later,
- panic_param);
-
- lockdep_info();
-
- /*
- * Need to run this when irqs are enabled, because it wants
- * to self-test [hard/soft]-irqs on/off lock inversion bugs
- * too:
- */
- locking_selftest();
-
- #ifdef CONFIG_BLK_DEV_INITRD
- if (initrd_start && !initrd_below_start_ok &&
- page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
- pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
- page_to_pfn(virt_to_page((void *)initrd_start)),
- min_low_pfn);
- initrd_start = 0;
- }
- #endif
- page_ext_init();
- debug_objects_mem_init();
- kmemleak_init();
- setup_per_cpu_pageset();
- numa_policy_init();
- if (late_time_init)
- late_time_init();
- sched_clock_init();
- calibrate_delay();
- pidmap_init();
- anon_vma_init();
- acpi_early_init();
- thread_info_cache_init();
- cred_init();
- fork_init();
- proc_caches_init();
- buffer_init();
- key_init();
- security_init();
- dbg_late_init();
- vfs_caches_init(totalram_pages);
- signals_init();
- /* rootfs populating might need page-writeback */
- page_writeback_init();
- proc_root_init();
- nsfs_init();
- cpuset_init();
- cgroup_init();
- taskstats_init_early();
- delayacct_init();
-
- check_bugs();
-
- acpi_subsystem_init();
- sfi_init_late();
-
- if (efi_enabled(EFI_RUNTIME_SERVICES)) {
- efi_late_init();
- efi_free_boot_services();
- }
-
- ftrace_init();
-
- /* Do the rest non-__init'ed, we're now alive */
- rest_init();
- }
初始化全局锁链表
在init_task栈底存入魔数0x57AC6E9D,用于栈溢出检测。
多核检测,暂时不管,单核使用
初始化obj_hash、obj_static_pool全局变量
control group早起初始化
关闭中断
获取cpuid,激活CPU
初始化高端内存,arm没有使用
处理器变量设置、获取设备树中bootargs,cmdline等初始化相关的参数、初始化init进程的内存描述符init_mm
mm_init里面的初始化cpu mask
保存cmdline
为系统cpu变量申请per_cpu
设置引导CPU
内存管理,node,zone
内存页初始化
解析内核参数
进程调度器初始化
禁止抢占