目录
start_kernel()-》setup_arch()-》setup_machine_fdt()-》 early_init_dt_scan_memory()
通过fdt 获取存储信息,base_address和size,再通过early_init_dt_add_memory_arch(base, size); 加入memblock 子系统。
kernel-4.19/arch/arm64/mm/init.c
659#define MLK(b, t) b, t, ((t) - (b)) >> 10
660#define MLM(b, t) b, t, ((t) - (b)) >> 20
661#define MLG(b, t) b, t, ((t) - (b)) >> 30
662#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
663
664 pr_notice("Virtual kernel memory layout:\n");
665#ifdef CONFIG_KASAN
666 pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n",
667 MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
668#endif
669 pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n",
670 MLM(MODULES_VADDR, MODULES_END));
671 pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n",
672 MLG(VMALLOC_START, VMALLOC_END));
673 pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n",
674 MLK_ROUNDUP(_text, _etext));
675 pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n",
676 MLK_ROUNDUP(__start_rodata, __init_begin));
677 pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n",
678 MLK_ROUNDUP(__init_begin, __init_end));
679 pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
680 MLK_ROUNDUP(_sdata, _edata));
681 pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n",
682 MLK_ROUNDUP(__bss_start, __bss_stop));
683 pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n",
684 MLK(FIXADDR_START, FIXADDR_TOP));
685 pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n",
686 MLM(PCI_IO_START, PCI_IO_END));
687#ifdef CONFIG_SPARSEMEM_VMEMMAP
688 pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n",
689 MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
690 pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n",
691 MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
692 (unsigned long)virt_to_page(high_memory)));
693#endif
694 pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
695 MLM(__phys_to_virt(memblock_start_of_DRAM()),
696 (unsigned long)high_memory));
kernel-4.19/arch/arm64/include/asm/pgtable.h
/*
27 * VMALLOC range.
28 *
29 * VMALLOC_START: beginning of the kernel vmalloc space
30 * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
31 * and fixed mappings
32 */
33#define VMALLOC_START (MODULES_END)
34#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
35
36#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
37
38#define FIRST_USER_ADDRESS 0UL
这里VMALLOC_START 跟 KIMAGE_VADDR 一样
kernel-4.19/arch/arm64/kernel/head.S
/*
377 * Map the kernel image (starting with PHYS_OFFSET).
378 */
379 adrp x0, swapper_pg_dir
380 mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
381 add x5, x5, x23 // add KASLR displacement
382 mov x4, PTRS_PER_PGD
383 adrp x6, _end // runtime __pa(_end)
384 adrp x3, _text // runtime __pa(_text)
385 sub x6, x6, x3 // _end - _text
386 add x6, x6, x5 // runtime __va(_end)
map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
map_memory 是将虚拟地址x5 ,长度x6 映射到物理地址x3 开始的位置;其中x0 页表地址,x1 第一个页表项,通常x1=x0+PAGE_SIZE;x4 表示对应页表等级由多少项。这里是将内核代码段.text 从虚拟地址KIMAGE_VADDR + TEXT_OFFSET+KASLR 偏移 对应虚拟地址映射到物理地址_text 对应连续物理地址。
vmlinux 对应的是编译链接地址;coredump 及内核堆栈对应的是运行时地址;
支持kaslr之前,kernel加载到system RAM的某个位置,它之前的内存kernel是无法管理的,所以一般将kernel加载到system RAM的 起始位置+TEXT_OFFSET(0x080000)处,因为kaslr修改成可以随意加载到system RAM的任何位置,只要满足对齐要求就可以;
支持kaslr之前,kernel image是映射到线性映射区域的(4.15 之前),因为kaslr才修改成映射到vmalloc区域;
为了支持kaslr,内核要编译成PIE(Position Independent Execuable),才能重映射
这样.text 其实位置跟 VMALLOC区其实地址有一个偏移
add_link = addr_run - (VAMLLOC_START - .text_start) + TEXT_OFFSET ???
add_link 是addr2line 使用,addr_run 是虚拟地址,运行时堆栈地址。 .text_start 是load物理地址???
MTK 平台:
static inline void show_kaslr(void) 55{ 56 u64 const kaslr_offset = aee_get_kimage_vaddr() - KIMAGE_VADDR; 57 58 pr_notice("Kernel Offset: 0x%llx from 0x%lx\n", 59 kaslr_offset, KIMAGE_VADDR); 60 pr_notice("PHYS_OFFSET: 0x%llx\n", PHYS_OFFSET); 61 aee_rr_rec_kaslr_offset(kaslr_offset); 62}
aee_get_kimage_vaddr 从coredump 里面读取kimage_vaddr 对应地址
9#if defined(KIMAGE_VADDR)
90 machdesc_p->kimage_vaddr = KIMAGE_VADDR;
91#endif
92#if defined(TEXT_OFFSET)
93 machdesc_p->kimage_vaddr += TEXT_OFFSET;
94#endif
kernel-4.19/arch/arm64/include/asm/memory.h
35#define PCI_IO_SIZE SZ_16M
36
37/*
38 * Log2 of the upper bound of the size of a struct page. Used for sizing
39 * the vmemmap region only, does not affect actual memory footprint.
40 * We don't use sizeof(struct page) directly since taking its size here
41 * requires its definition to be available at this point in the inclusion
42 * chain, and it may not be a power of 2 in the first place.
43 */
44#define STRUCT_PAGE_MAX_SHIFT 6
45
46/*
47 * VMEMMAP_SIZE - allows the whole linear region to be covered by
48 * a struct page array
49 */
50#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT))
51
52/*
53 * PAGE_OFFSET - the virtual address of the start of the linear map (top
54 * (VA_BITS - 1))
55 * KIMAGE_VADDR - the virtual address of the start of the kernel image
56 * VA_BITS - the maximum number of bits for virtual addresses.
57 * VA_START - the first kernel virtual address.
58 */
59#define VA_BITS (CONFIG_ARM64_VA_BITS)
60#define VA_START (UL(0xffffffffffffffff) - \
61 (UL(1) << VA_BITS) + 1)
62#define PAGE_OFFSET (UL(0xffffffffffffffff) - \
63 (UL(1) << (VA_BITS - 1)) + 1)
64#define KIMAGE_VADDR (MODULES_END)
65#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
66#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
67#define MODULES_VSIZE (SZ_128M)
68#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE)
69#define PCI_IO_END (VMEMMAP_START - SZ_2M)
70#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
71#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
72
73#define KERNEL_START _text
74#define KERNEL_END _end
内核虚拟地址起点:VA_START = 0xffff_0000_0000_0000
PAGE_OFFSET =0xffff_1000_0000_0000
PAGE_OFFSET - the virtual address of the start of the linear map (top(VA_BITS - 1))
KIMAGE_VADDR - the virtual address of the start of the kernel image
这里MODULES_VSIZE = 128M = 0x8000000
kernel-4.19/arch/arm64/Makefile
90# The byte offset of the kernel image in RAM from the start of RAM.
91ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y)
92TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \
93 int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \
94 rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}")
95else
96TEXT_OFFSET := 0x00080000
这里CONFIG_ARM64_PAGE_SHIFT 页大小 12 位
User space 地址mmu转换示例:
基本概念介绍:
task_struct->mm
如果是用户进程,指向当前的进程地址空间。
如果是内核线程,为空(内核线程没有进程地址空间)。
task_struct->active_mm
如果是用户进程,mm与active_mm相同,都指向进程的地址空间。
如果是内核线程,指向被借用的用户进程的地址空间(mm)。
user space各个process 保存自己独立的pgd,存放在task__struct->mm->pgd里面,每次做context switch时,会把next_task的pgd存放到TTBR0_EL0里面,从而实现不同process不同的地址空间。
TTBR0_EL1 对应内核pgd
cr3寄存器的加载
cr3寄存器的加载是在进程调度的时候更新的,具体如下
schedule()->context_switch()->switch_mm()->load_cr3(next->pgd)
load_cr3加载的是mm_struct->pgd,即线性地址,而实际上加裁到cr3寄存器的是实际的物理地址write_cr3(__pa(pgdir));在装载cr3寄存器时将线性地址通过__pa转换成了物理地址了,所以cr3寄存器是装的是实实在在的物理地址。正在使用的页目录的物理地址存在cr3控制寄存器中
假设页表映射层级是4,即配置CONFIG_ARM64_PGTABLE_LEVELS=4。地址宽度是48,即配置CONFIG_ARM64_VA_BITS=48,页大小4K,每个页表项占 8字节

PGD [47,39] 512*512G=256T
PUD [38,30] 512G
PMD [29,21] 512*2M = 1G
PTE [20,12] 4K/8=512 项,512*4K = 2M
PAGE_SHIFT [11~0]
kernel-4.19/arch/arm64/include/asm/pgtable-hwdef.h
*/ 16#ifndef __ASM_PGTABLE_HWDEF_H 17#define __ASM_PGTABLE_HWDEF_H 18 19#include <asm/memory.h> 20 21/* 22 * Number of page-table levels required to address 'va_bits' wide 23 * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) 24 * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: 25 * 26 * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) 27 * 28 * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) 29 * 30 * We cannot include linux/kernel.h which defines DIV_ROUND_UP here 31 * due to build issues. So we open code DIV_ROUND_UP here: 32 * 33 * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) 34 * 35 * which gets simplified as : 36 */ 37#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) 38 39/* 40 * Size mapped by an entry at level n ( 0 <= n <= 3) 41 * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits 42 * in the final page. The maximum number of translation levels supported by 43 * the architecture is 4. Hence, starting at at level n, we have further 44 * ((4 - n) - 1) levels of translation excluding the offset within the page. 45 * So, the total number of bits mapped by an entry at level n is : 46 * 47 * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT 48 * 49 * Rearranging it a bit we get : 50 * (4 - n) * (PAGE_SHIFT - 3) + 3 51 */ 52#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) 53 54#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) 55 56/* 57 * PMD_SHIFT determines the size a level 2 page table entry can map. 58 */ 59#if CONFIG_PGTABLE_LEVELS > 2 60#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) 61#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) 62#define PMD_MASK (~(PMD_SIZE-1)) 63#define PTRS_PER_PMD PTRS_PER_PTE 64#endif 65 66/* 67 * PUD_SHIFT determines the size a level 1 page table entry can map. 68 */ 69#if CONFIG_PGTABLE_LEVELS > 3 70#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) 71#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) 72#define PUD_MASK (~(PUD_SIZE-1)) 73#define PTRS_PER_PUD PTRS_PER_PTE 74#endif 75 76/* 77 * PGDIR_SHIFT determines the size a top-level page table entry can map 78 * (depending on the configuration, this level can be 0, 1 or 2). 79 */ 80#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) 81#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) 82#define PGDIR_MASK (~(PGDIR_SIZE-1)) 83#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) 84 85/* 86 * Section address mask and size definitions. 87 */ 88#define SECTION_SHIFT PMD_SHIFT 89#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) 90#define SECTION_MASK (~(SECTION_SIZE-1))
当配置CONFIG_PGTABLE_LEVELS为4 ,则为4级页表
PGDIR_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(0) = 39 ,表示VA 中除了本级页表地址,还有39 位表示其它级地址
PTRS_PER_PGD = (1 << (VA_BITS - PGDIR_SHIFT)) = 1<<9
virt_to_phys和phys_to_virt
内核虚拟地址起点:VA_START = 0xffff_0000_0000_0000
PAGE_OFFSET =0xffff_1000_0000_0000
PAGE_OFFSET - the virtual address of the start of the linear map (top(VA_BITS - 1))
对于48位虚拟地址,从PAGE_OFFSET 开始的往大地址的区域是线性区域,跟物理地址就是一个PHYS_OFFSET 偏差;如果不是线性区域,这个时候是 kimage_voffset 偏移;
kimage_voffset 的获取:
__primary_switch 这里获取的是MMU 没有打开时的_text 链接的地址(相对VMALLOC有一个偏移),加载地址跟链接地址一样;
__primary_switched x4 获取是运行时_text 运行虚拟地址,这个时候运行地址跟加载地址不一样,从而x4-x0 就是运行虚拟地址跟加载地址的一个偏移存入kimage_voffset。
device tree 简介 - 灰信网(软件开发博客聚合)