• Linux static_key原理与应用


    背景

    内核中有很多判断条件在正常情况下的结果都是固定的,除非极其罕见的场景才会改变,通常单个的这种判断的代价很低可以忽略,但是如果这种判断数量巨大且被频繁执行,那就会带来性能损失了。内核的static-key机制就是为了优化这种场景,其优化的结果是:对于大多数情况,对应的判断被优化为一个NOP指令,在非常有场景的时候就变成jump XXX一类的指令,使得对应的代码段得到执行。
    在这里插入图片描述

    1. static-key的使用方法

    1.1. static-key定义

    static_key 结构体的定义如下:

    #ifdef CONFIG_JUMP_LABEL
    
    struct static_key {
    	atomic_t enabled;
    /*
     * Note:
     *   To make anonymous unions work with old compilers, the static
     *   initialization of them requires brackets. This creates a dependency
     *   on the order of the struct with the initializers. If any fields
     *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
     *   to be modified.
     *
     * bit 0 => 1 if key is initially true
     *	    0 if initially false
     * bit 1 => 1 if points to struct static_key_mod
     *	    0 if points to struct jump_entry
     */
    	union {
    		unsigned long type;
    		struct jump_entry *entries;
    		struct static_key_mod *next;
    	};
    };
    
    #else
    struct static_key {
    	atomic_t enabled;
    };
    #endif	/* CONFIG_JUMP_LABEL */
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29

    如果没有定义CONFIG_JUMP_LABEL,则static_key 退化成atomic变量。

    1.2 初始化

    #define DEFINE_STATIC_KEY_TRUE(name)	\
    	struct static_key_true name = STATIC_KEY_TRUE_INIT
    #define DEFINE_STATIC_KEY_FALSE(name)	\
    	struct static_key_false name = STATIC_KEY_FALSE_INIT
    
    • 1
    • 2
    • 3
    • 4
    #define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
    #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }
    
    #define STATIC_KEY_INIT_TRUE                    \
        { .enabled = { 1 },                    \
          .entries = (void *)JUMP_TYPE_TRUE }
    #define STATIC_KEY_INIT_FALSE                    \
        { .enabled = { 0 },                    \
          .entries = (void *)JUMP_TYPE_FALSE }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    false和true的主要区别就是enabled 是否为1.

    1.3 条件判断

    #ifdef CONFIG_JUMP_LABEL
    
    /*
     * Combine the right initial value (type) with the right branch order
     * to generate the desired result.
     *
     *
     * type\branch|	likely (1)	      |	unlikely (0)
     * -----------+-----------------------+------------------
     *            |                       |
     *  true (1)  |	   ...		      |	   ...
     *            |    NOP		      |	   JMP L
     *            |    	      |	1: ...
     *            |	L: ...		      |
     *            |			      |
     *            |			      |	L: 
     *            |			      |	   jmp 1b
     *            |                       |
     * -----------+-----------------------+------------------
     *            |                       |
     *  false (0) |	   ...		      |	   ...
     *            |    JMP L	      |	   NOP
     *            |    	      |	1: ...
     *            |	L: ...		      |
     *            |			      |
     *            |			      |	L: 
     *            |			      |	   jmp 1b
     *            |                       |
     * -----------+-----------------------+------------------
     *
     * The initial value is encoded in the LSB of static_key::entries,
     * type: 0 = false, 1 = true.
     *
     * The branch type is encoded in the LSB of jump_entry::key,
     * branch: 0 = unlikely, 1 = likely.
     *
     * This gives the following logic table:
     *
     *	enabled	type	branch	  instuction
     * -----------------------------+-----------
     *	0	0	0	| NOP
     *	0	0	1	| JMP
     *	0	1	0	| NOP
     *	0	1	1	| JMP
     *
     *	1	0	0	| JMP
     *	1	0	1	| NOP
     *	1	1	0	| JMP
     *	1	1	1	| NOP
     *
     * Which gives the following functions:
     *
     *   dynamic: instruction = enabled ^ branch
     *   static:  instruction = type ^ branch
     *
     * See jump_label_type() / jump_label_init_type().
     */
    
    #define static_branch_likely(x)							\
    ({										\
    	bool branch;								\
    	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
    		branch = !arch_static_branch(&(x)->key, true);			\
    	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
    		branch = !arch_static_branch_jump(&(x)->key, true);		\
    	else									\
    		branch = ____wrong_branch_error();				\
    	likely(branch);								\
    })
    
    #define static_branch_unlikely(x)						\
    ({										\
    	bool branch;								\
    	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
    		branch = arch_static_branch_jump(&(x)->key, false);		\
    	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
    		branch = arch_static_branch(&(x)->key, false);			\
    	else									\
    		branch = ____wrong_branch_error();				\
    	unlikely(branch);							\
    })
    
    #else /* !CONFIG_JUMP_LABEL */
    
    #define static_branch_likely(x)		likely(static_key_enabled(&(x)->key))
    #define static_branch_unlikely(x)	unlikely(static_key_enabled(&(x)->key))
    
    #endif /* CONFIG_JUMP_LABEL */
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88

    可见同样依赖HAVE_JUMP_LABEL。如果没有定义的话,直接退化成likely和unlikely
    static_branch_unlikelystatic_branch_likely 只是填充指令的方式不同(可以参考上面的代码注释), 当static_key为false时,都会进入else逻辑语句中。

    if (static_branch_unlikely((&static_key)))
        do likely work;
    else
        do unlikely work
    
    • 1
    • 2
    • 3
    • 4

    1.4 修改判断条件

    使用static_branch_enablestatic_branch_disable可以改变static_key 状态

    #define static_branch_enable(x)        static_key_enable(&(x)->key)
    #define static_branch_disable(x)    static_key_disable(&(x)->key)
    
    • 1
    • 2

    底层是调用static_key_slow_dec, static_key_slow_dec来改变key->enabled计数。

    static inline void static_key_enable(struct static_key *key)
    {
        int count = static_key_count(key);
    
        WARN_ON_ONCE(count < 0 || count > 1);
        
        if (!count)
            static_key_slow_inc(key);
    }
    static inline void static_key_disable(struct static_key *key)
    {
        int count = static_key_count(key);
    
        WARN_ON_ONCE(count < 0 || count > 1);
        
        if (count)
            static_key_slow_dec(key);
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    static inline void static_key_slow_inc(struct static_key *key)
    {
    	STATIC_KEY_CHECK_USE(key);
    	atomic_inc(&key->enabled);
    }
    
    static inline void static_key_slow_dec(struct static_key *key)
    {
    	STATIC_KEY_CHECK_USE(key);
    	atomic_dec(&key->enabled);
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    2、示例代码

    下面我们用一段代码来分析static-key对程序分支跳转硬编码的影响。

    #include 
    #include 
    #include 
    #include 
    
    DEFINE_STATIC_KEY_FALSE(key);
    
    void func(int a){
        if (static_branch_unlikely(&key)) {  
            printk("my_module: Feature is enabled\n");
        } else {
            printk("my_module: Feature is disabled\n");
        }
    }
    
    static int __init my_module_init(void) {
        pr_info("my_module: Module loaded\n");
        int a = 1;
        func(a);
        static_branch_enable(&key);
        func(a);
        return 0;
    }
    
    static void __exit my_module_exit(void) {
        pr_info("my_module: Module unloaded\n");
    }
    
    module_init(my_module_init);
    module_exit(my_module_exit);
    
    MODULE_LICENSE("GPL");
    MODULE_AUTHOR("Your Name");
    MODULE_DESCRIPTION("Sample Kernel Module with Static Key");
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34

    func汇编代码如下:

    0000000000000000 <func>:
       0:   a9bf7bfd        stp     x29, x30, [sp, #-16]!
       4:   910003fd        mov     x29, sp
       8:   d503201f        nop
       c:   90000000        adrp    x0, 0 <func>
      10:   91000000        add     x0, x0, #0x0
      14:   94000000        bl      0 <printk>
      18:   a8c17bfd        ldp     x29, x30, [sp], #16
      1c:   d65f03c0        ret
      20:   90000000        adrp    x0, 0 <func>
      24:   91000000        add     x0, x0, #0x0
      28:   94000000        bl      0 <printk>
      2c:   17fffffb        b       18 <func+0x18>
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    func中不适用static-key时,汇编代码如下:

    void func(int a){
        if (a) {  
            printk("my_module: Feature is enabled\n");
        } else {
            printk("my_module: Feature is disabled\n");
        }
    }
    
    0000000000000000 <func>:
       0:   a9bf7bfd        stp     x29, x30, [sp, #-16]!
       4:   910003fd        mov     x29, sp
       8:   340000a0        cbz     w0, 1c <func+0x1c>
       c:   90000000        adrp    x0, 0 <func>
      10:   91000000        add     x0, x0, #0x0
      14:   94000000        bl      0 <printk>
      18:   14000004        b       28 <func+0x28>
      1c:   90000000        adrp    x0, 0 <func>
      20:   91000000        add     x0, x0, #0x0
      24:   94000000        bl      0 <printk>
      28:   a8c17bfd        ldp     x29, x30, [sp], #16
      2c:   d65f03c0        ret
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21

    对比可以发现,在0x8地址处,使用static-key编码在编译时将cbz指令替换为了nop指令,减少了程序运行时对比次数。

    参考链接

    1. Linux内核中的static-key机制
    2. Linux内核jump label与static key的原理与示例
    3. static-keys.html | 静态键
    4. Linux Jump Label/static-key机制详解
  • 相关阅读:
    MySQL——DQL语法 练习笔记
    Java设计模式之状态模式
    摆闸机的应用领域和性能特点
    浅谈 AOP 什么是 AOP ?
    (六)admin-boot项目之全局处理预防xss攻击
    小知识:SQL Monitor Report的使用
    微前端qiankun接入Vue和React项目
    HACK ME PLEASE: 1实战演练
    【HTML——旋转火焰】(效果+代码)
    VUE响应式
  • 原文地址:https://blog.csdn.net/SGchi/article/details/132859345