• linux-内存


    目录

    加载解析fdt 存储信息

    kaslr和页表映射

    虚拟地址到物理地址转换

     参考


    加载解析fdt 存储信息

    start_kernel()-》setup_arch()-》setup_machine_fdt()-》 early_init_dt_scan_memory()

    通过fdt 获取存储信息,base_address和size,再通过early_init_dt_add_memory_arch(base, size); 加入memblock 子系统。

    kaslr和页表映射

    kernel-4.19/arch/arm64/mm/init.c

    659#define MLK(b, t) b, t, ((t) - (b)) >> 10
    660#define MLM(b, t) b, t, ((t) - (b)) >> 20
    661#define MLG(b, t) b, t, ((t) - (b)) >> 30
    662#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
    663
    664    pr_notice("Virtual kernel memory layout:\n");
    665#ifdef CONFIG_KASAN
    666    pr_notice("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
    667        MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
    668#endif
    669    pr_notice("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
    670        MLM(MODULES_VADDR, MODULES_END));
    671    pr_notice("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
    672        MLG(VMALLOC_START, VMALLOC_END));
    673    pr_notice("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
    674        MLK_ROUNDUP(_text, _etext));
    675    pr_notice("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
    676        MLK_ROUNDUP(__start_rodata, __init_begin));
    677    pr_notice("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
    678        MLK_ROUNDUP(__init_begin, __init_end));
    679    pr_notice("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
    680        MLK_ROUNDUP(_sdata, _edata));
    681    pr_notice("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
    682        MLK_ROUNDUP(__bss_start, __bss_stop));
    683    pr_notice("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
    684        MLK(FIXADDR_START, FIXADDR_TOP));
    685    pr_notice("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
    686        MLM(PCI_IO_START, PCI_IO_END));
    687#ifdef CONFIG_SPARSEMEM_VMEMMAP
    688    pr_notice("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
    689        MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
    690    pr_notice("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
    691        MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
    692            (unsigned long)virt_to_page(high_memory)));
    693#endif
    694    pr_notice("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
    695        MLM(__phys_to_virt(memblock_start_of_DRAM()),
    696            (unsigned long)high_memory));

    kernel-4.19/arch/arm64/include/asm/pgtable.h
    /*
    27 * VMALLOC range.
    28 *
    29 * VMALLOC_START: beginning of the kernel vmalloc space
    30 * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
    31 *    and fixed mappings
    32 */
    33#define VMALLOC_START        (MODULES_END)
    34#define VMALLOC_END        (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
    35
    36#define vmemmap            ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
    37
    38#define FIRST_USER_ADDRESS    0UL

    这里VMALLOC_START 跟 KIMAGE_VADDR 一样

    kernel-4.19/arch/arm64/kernel/head.S
        /*
    377     * Map the kernel image (starting with PHYS_OFFSET).
    378     */
    379    adrp    x0, swapper_pg_dir
    380    mov_q    x5, KIMAGE_VADDR + TEXT_OFFSET    // compile time __va(_text)
    381    add    x5, x5, x23            // add KASLR displacement

    382    mov    x4, PTRS_PER_PGD
    383    adrp    x6, _end            // runtime __pa(_end)
    384    adrp    x3, _text            // runtime __pa(_text)
    385    sub    x6, x6, x3            // _end - _text
    386    add    x6, x6, x5            // runtime __va(_end)

    map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14

    map_memory 是将虚拟地址x5 ,长度x6 映射到物理地址x3 开始的位置;其中x0 页表地址,x1 第一个页表项,通常x1=x0+PAGE_SIZE;x4 表示对应页表等级由多少项。这里是将内核代码段.text 从虚拟地址KIMAGE_VADDR + TEXT_OFFSET+KASLR 偏移  对应虚拟地址映射到物理地址_text 对应连续物理地址。

    vmlinux 对应的是编译链接地址;coredump 及内核堆栈对应的是运行时地址;

    支持kaslr之前,kernel加载到system RAM的某个位置,它之前的内存kernel是无法管理的,所以一般将kernel加载到system RAM的 起始位置+TEXT_OFFSET(0x080000)处,因为kaslr修改成可以随意加载到system RAM的任何位置,只要满足对齐要求就可以;
    支持kaslr之前,kernel image是映射到线性映射区域的(4.15 之前),因为kaslr才修改成映射到vmalloc区域;
    为了支持kaslr,内核要编译成PIE(Position Independent Execuable),才能重映射

    这样.text  其实位置跟 VMALLOC区其实地址有一个偏移

    add_link = addr_run - (VAMLLOC_START - .text_start) + TEXT_OFFSET   ???

    add_link  是addr2line 使用,addr_run 是虚拟地址,运行时堆栈地址。 .text_start 是load物理地址???

    MTK 平台:

    static inline void show_kaslr(void)
    55{
    56	u64 const kaslr_offset = aee_get_kimage_vaddr() - KIMAGE_VADDR;
    57
    58	pr_notice("Kernel Offset: 0x%llx from 0x%lx\n",
    59			kaslr_offset, KIMAGE_VADDR);
    60	pr_notice("PHYS_OFFSET: 0x%llx\n", PHYS_OFFSET);
    61	aee_rr_rec_kaslr_offset(kaslr_offset);
    62}

    aee_get_kimage_vaddr  从coredump 里面读取kimage_vaddr 对应地址

    9#if defined(KIMAGE_VADDR)
    90    machdesc_p->kimage_vaddr = KIMAGE_VADDR;
    91#endif
    92#if defined(TEXT_OFFSET)
    93    machdesc_p->kimage_vaddr += TEXT_OFFSET;
    94#endif


    kernel-4.19/arch/arm64/include/asm/memory.h

    35#define PCI_IO_SIZE        SZ_16M
    36
    37/*
    38 * Log2 of the upper bound of the size of a struct page. Used for sizing
    39 * the vmemmap region only, does not affect actual memory footprint.
    40 * We don't use sizeof(struct page) directly since taking its size here
    41 * requires its definition to be available at this point in the inclusion
    42 * chain, and it may not be a power of 2 in the first place.
    43 */
    44#define STRUCT_PAGE_MAX_SHIFT    6
    45
    46/*
    47 * VMEMMAP_SIZE - allows the whole linear region to be covered by
    48 *                a struct page array
    49 */
    50#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT))
    51
    52/*
    53 * PAGE_OFFSET - the virtual address of the start of the linear map (top
    54 *         (VA_BITS - 1))
    55 * KIMAGE_VADDR - the virtual address of the start of the kernel image
    56 * VA_BITS - the maximum number of bits for virtual addresses.
    57 * VA_START - the first kernel virtual address.
    58 */
    59#define VA_BITS            (CONFIG_ARM64_VA_BITS)
    60#define VA_START        (UL(0xffffffffffffffff) - \
    61    (UL(1) << VA_BITS) + 1)
    62#define PAGE_OFFSET        (UL(0xffffffffffffffff) - \
    63    (UL(1) << (VA_BITS - 1)) + 1)
    64#define KIMAGE_VADDR        (MODULES_END)
    65#define MODULES_END        (MODULES_VADDR + MODULES_VSIZE)
    66#define MODULES_VADDR        (VA_START + KASAN_SHADOW_SIZE)
    67#define MODULES_VSIZE        (SZ_128M)
    68#define VMEMMAP_START        (PAGE_OFFSET - VMEMMAP_SIZE)
    69#define PCI_IO_END        (VMEMMAP_START - SZ_2M)
    70#define PCI_IO_START        (PCI_IO_END - PCI_IO_SIZE)
    71#define FIXADDR_TOP        (PCI_IO_START - SZ_2M)
    72
    73#define KERNEL_START      _text
    74#define KERNEL_END        _end

    内核虚拟地址起点:VA_START = 0xffff_0000_0000_0000

    PAGE_OFFSET  =0xffff_1000_0000_0000

     PAGE_OFFSET - the virtual address of the start of the linear map (top(VA_BITS - 1))

    KIMAGE_VADDR - the virtual address of the start of the kernel image

    这里MODULES_VSIZE = 128M = 0x8000000

    ​kernel-4.19/arch/arm64/Makefile
    90# The byte offset of the kernel image in RAM from the start of RAM.
    91ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y)
    92TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \
    93         int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \
    94         rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}")
    95else
    96TEXT_OFFSET := 0x00080000

    这里CONFIG_ARM64_PAGE_SHIFT 页大小 12 位

    User space 地址mmu转换示例:
    基本概念介绍:

    task_struct->mm
    如果是用户进程,指向当前的进程地址空间。

    如果是内核线程,为空(内核线程没有进程地址空间)。

    task_struct->active_mm
    如果是用户进程,mm与active_mm相同,都指向进程的地址空间。

    如果是内核线程,指向被借用的用户进程的地址空间(mm)。

    user space各个process 保存自己独立的pgd,存放在task__struct->mm->pgd里面,每次做context  switch时,会把next_task的pgd存放到TTBR0_EL0里面,从而实现不同process不同的地址空间。

    TTBR0_EL1 对应内核pgd

    cr3寄存器的加载

    cr3寄存器的加载是在进程调度的时候更新的,具体如下

    schedule()->context_switch()->switch_mm()->load_cr3(next->pgd)

    load_cr3加载的是mm_struct->pgd,即线性地址,而实际上加裁到cr3寄存器的是实际的物理地址write_cr3(__pa(pgdir));在装载cr3寄存器时将线性地址通过__pa转换成了物理地址了,所以cr3寄存器是装的是实实在在的物理地址。正在使用的页目录的物理地址存在cr3控制寄存器中

    ARM64 Kernel Image Mapping的变化

    假设页表映射层级是4,即配置CONFIG_ARM64_PGTABLE_LEVELS=4。地址宽度是48,即配置CONFIG_ARM64_VA_BITS=48,页大小4K,每个页表项占 8字节

    PGD  [47,39]          512*512G=256T

    PUD  [38,30]          512G

    PMD [29,21]          512*2M = 1G

    PTE  [20,12]          4K/8=512 项,512*4K = 2M

    PAGE_SHIFT   [11~0]

    kernel-4.19/arch/arm64/include/asm/pgtable-hwdef.h

     */
    16#ifndef __ASM_PGTABLE_HWDEF_H
    17#define __ASM_PGTABLE_HWDEF_H
    18
    19#include <asm/memory.h>
    20
    21/*
    22 * Number of page-table levels required to address 'va_bits' wide
    23 * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
    24 * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
    25 *
    26 *  levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
    27 *
    28 * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
    29 *
    30 * We cannot include linux/kernel.h which defines DIV_ROUND_UP here
    31 * due to build issues. So we open code DIV_ROUND_UP here:
    32 *
    33 *	((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
    34 *
    35 * which gets simplified as :
    36 */
    37#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
    38
    39/*
    40 * Size mapped by an entry at level n ( 0 <= n <= 3)
    41 * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
    42 * in the final page. The maximum number of translation levels supported by
    43 * the architecture is 4. Hence, starting at at level n, we have further
    44 * ((4 - n) - 1) levels of translation excluding the offset within the page.
    45 * So, the total number of bits mapped by an entry at level n is :
    46 *
    47 *  ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
    48 *
    49 * Rearranging it a bit we get :
    50 *   (4 - n) * (PAGE_SHIFT - 3) + 3
    51 */
    52#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n)	((PAGE_SHIFT - 3) * (4 - (n)) + 3)
    53
    54#define PTRS_PER_PTE		(1 << (PAGE_SHIFT - 3))
    55
    56/*
    57 * PMD_SHIFT determines the size a level 2 page table entry can map.
    58 */
    59#if CONFIG_PGTABLE_LEVELS > 2
    60#define PMD_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
    61#define PMD_SIZE		(_AC(1, UL) << PMD_SHIFT)
    62#define PMD_MASK		(~(PMD_SIZE-1))
    63#define PTRS_PER_PMD		PTRS_PER_PTE
    64#endif
    65
    66/*
    67 * PUD_SHIFT determines the size a level 1 page table entry can map.
    68 */
    69#if CONFIG_PGTABLE_LEVELS > 3
    70#define PUD_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
    71#define PUD_SIZE		(_AC(1, UL) << PUD_SHIFT)
    72#define PUD_MASK		(~(PUD_SIZE-1))
    73#define PTRS_PER_PUD		PTRS_PER_PTE
    74#endif
    75
    76/*
    77 * PGDIR_SHIFT determines the size a top-level page table entry can map
    78 * (depending on the configuration, this level can be 0, 1 or 2).
    79 */
    80#define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
    81#define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
    82#define PGDIR_MASK		(~(PGDIR_SIZE-1))
    83#define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
    84
    85/*
    86 * Section address mask and size definitions.
    87 */
    88#define SECTION_SHIFT		PMD_SHIFT
    89#define SECTION_SIZE		(_AC(1, UL) << SECTION_SHIFT)
    90#define SECTION_MASK		(~(SECTION_SIZE-1))

     当配置CONFIG_PGTABLE_LEVELS为4 ,则为4级页表

    PGDIR_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(0)   = 39 ,表示VA 中除了本级页表地址,还有39 位表示其它级地址

     PTRS_PER_PGD  = (1 << (VA_BITS - PGDIR_SHIFT))  = 1<<9 

    虚拟地址到物理地址转换

    virt_to_phys和phys_to_virt

    内核虚拟地址起点:VA_START = 0xffff_0000_0000_0000

    PAGE_OFFSET  =0xffff_1000_0000_0000

     PAGE_OFFSET - the virtual address of the start of the linear map (top(VA_BITS - 1))

    对于48位虚拟地址,从PAGE_OFFSET 开始的往大地址的区域是线性区域,跟物理地址就是一个PHYS_OFFSET 偏差;如果不是线性区域,这个时候是 kimage_voffset  偏移;

    1. #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
    2. #define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1)))
    3. #define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
    4. #define __kimg_to_phys(addr) ((addr) - kimage_voffset)

    1. #define __virt_to_phys_nodebug(x) ({ \
    2. phys_addr_t __x = (phys_addr_t)(x); \
    3. __is_lm_address(__x) ? __lm_to_phys(__x) : \__kimg_to_phys(__x); \
    4. #define __virt_to_phys(x) __virt_to_phys_nodebug(x)
    5. static inline phys_addr_t virt_to_phys(const volatile void *x)
    6. {
    7. return __virt_to_phys((unsigned long)(x));
    8. }

    kimage_voffset 的获取:

    1. #define KERNEL_START _text
    2. #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
    3. ENTRY(kimage_vaddr)
    4. .quad _text - TEXT_OFFSET
    5. /*
    6. * The following fragment of code is executed with the MMU enabled.
    7. *
    8. * x0 = __PHYS_OFFSET
    9. */
    10. __primary_switched:
    11. ldr_l x4, kimage_vaddr // Save the offset between /* 2 */
    12. sub x4, x4, x0 // the kernel virtual and /* 3 */
    13. str_l x4, kimage_voffset, x5 // physical mappings /* 4 */
    14. b start_kernel
    15. __primary_switch:
    16. bl __enable_mmu
    17. ldr x8, =__primary_switched
    18. adrp x0, __PHYS_OFFSET /* 1 */
    19. br x8

    __primary_switch 这里获取的是MMU 没有打开时的_text 链接的地址(相对VMALLOC有一个偏移),加载地址跟链接地址一样;

    __primary_switched  x4 获取是运行时_text 运行虚拟地址,这个时候运行地址跟加载地址不一样,从而x4-x0 就是运行虚拟地址跟加载地址的一个偏移存入kimage_voffset。

     参考

    device tree 简介 - 灰信网(软件开发博客聚合)

    ARM64 Kernel Image Mapping的变化

  • 相关阅读:
    c++基础(十五)——多态
    JVM 运行时数据区与JMM 内存模型详解
    “蔚来杯“2022牛客暑期多校训练营6 补题题解(A、B、G、J、M)
    【Qt】桌面应用开发教程——布局|按钮组|容器|常用控件|消息事件机制
    使用kubasz快速搭建Kubernetes集群
    【Java基础】类型转换与常用运算符
    HDFS High Availability(HA)高可用配置
    第七章《Java的异常处理》第4节:throw与throws关键字
    Docker启动失败报错Failed to start Docker Application Container Engine解决方案
    源码解析day06 (PriorityQueue)
  • 原文地址:https://blog.csdn.net/lei7143/article/details/125410184