内核中有很多判断条件在正常情况下的结果都是固定的,除非极其罕见的场景才会改变,通常单个的这种判断的代价很低可以忽略,但是如果这种判断数量巨大且被频繁执行,那就会带来性能损失了。内核的static-key机制就是为了优化这种场景,其优化的结果是:对于大多数情况,对应的判断被优化为一个NOP指令,在非常有场景的时候就变成jump XXX一类的指令,使得对应的代码段得到执行。
static_key 结构体的定义如下:
#ifdef CONFIG_JUMP_LABEL
struct static_key {
atomic_t enabled;
/*
* Note:
* To make anonymous unions work with old compilers, the static
* initialization of them requires brackets. This creates a dependency
* on the order of the struct with the initializers. If any fields
* are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
* to be modified.
*
* bit 0 => 1 if key is initially true
* 0 if initially false
* bit 1 => 1 if points to struct static_key_mod
* 0 if points to struct jump_entry
*/
union {
unsigned long type;
struct jump_entry *entries;
struct static_key_mod *next;
};
};
#else
struct static_key {
atomic_t enabled;
};
#endif /* CONFIG_JUMP_LABEL */
如果没有定义CONFIG_JUMP_LABEL
,则static_key
退化成atomic
变量。
#define DEFINE_STATIC_KEY_TRUE(name) \
struct static_key_true name = STATIC_KEY_TRUE_INIT
#define DEFINE_STATIC_KEY_FALSE(name) \
struct static_key_false name = STATIC_KEY_FALSE_INIT
#define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, }
#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }
#define STATIC_KEY_INIT_TRUE \
{ .enabled = { 1 }, \
.entries = (void *)JUMP_TYPE_TRUE }
#define STATIC_KEY_INIT_FALSE \
{ .enabled = { 0 }, \
.entries = (void *)JUMP_TYPE_FALSE }
false和true的主要区别就是enabled 是否为1.
#ifdef CONFIG_JUMP_LABEL
/*
* Combine the right initial value (type) with the right branch order
* to generate the desired result.
*
*
* type\branch| likely (1) | unlikely (0)
* -----------+-----------------------+------------------
* | |
* true (1) | ... | ...
* | NOP | JMP L
* | | 1: ...
* | L: ... |
* | |
* | | L:
* | | jmp 1b
* | |
* -----------+-----------------------+------------------
* | |
* false (0) | ... | ...
* | JMP L | NOP
* | | 1: ...
* | L: ... |
* | |
* | | L:
* | | jmp 1b
* | |
* -----------+-----------------------+------------------
*
* The initial value is encoded in the LSB of static_key::entries,
* type: 0 = false, 1 = true.
*
* The branch type is encoded in the LSB of jump_entry::key,
* branch: 0 = unlikely, 1 = likely.
*
* This gives the following logic table:
*
* enabled type branch instuction
* -----------------------------+-----------
* 0 0 0 | NOP
* 0 0 1 | JMP
* 0 1 0 | NOP
* 0 1 1 | JMP
*
* 1 0 0 | JMP
* 1 0 1 | NOP
* 1 1 0 | JMP
* 1 1 1 | NOP
*
* Which gives the following functions:
*
* dynamic: instruction = enabled ^ branch
* static: instruction = type ^ branch
*
* See jump_label_type() / jump_label_init_type().
*/
#define static_branch_likely(x) \
({ \
bool branch; \
if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \
branch = !arch_static_branch(&(x)->key, true); \
else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
branch = !arch_static_branch_jump(&(x)->key, true); \
else \
branch = ____wrong_branch_error(); \
likely(branch); \
})
#define static_branch_unlikely(x) \
({ \
bool branch; \
if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \
branch = arch_static_branch_jump(&(x)->key, false); \
else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
branch = arch_static_branch(&(x)->key, false); \
else \
branch = ____wrong_branch_error(); \
unlikely(branch); \
})
#else /* !CONFIG_JUMP_LABEL */
#define static_branch_likely(x) likely(static_key_enabled(&(x)->key))
#define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key))
#endif /* CONFIG_JUMP_LABEL */
可见同样依赖HAVE_JUMP_LABEL
。如果没有定义的话,直接退化成likely和unlikely
static_branch_unlikely
和 static_branch_likely
只是填充指令的方式不同(可以参考上面的代码注释), 当static_key
为false时,都会进入else逻辑语句中。
if (static_branch_unlikely((&static_key)))
do likely work;
else
do unlikely work
使用static_branch_enable
和 static_branch_disable
可以改变static_key 状态
#define static_branch_enable(x) static_key_enable(&(x)->key)
#define static_branch_disable(x) static_key_disable(&(x)->key)
底层是调用static_key_slow_dec
, static_key_slow_dec
来改变key->enabled计数。
static inline void static_key_enable(struct static_key *key)
{
int count = static_key_count(key);
WARN_ON_ONCE(count < 0 || count > 1);
if (!count)
static_key_slow_inc(key);
}
static inline void static_key_disable(struct static_key *key)
{
int count = static_key_count(key);
WARN_ON_ONCE(count < 0 || count > 1);
if (count)
static_key_slow_dec(key);
}
static inline void static_key_slow_inc(struct static_key *key)
{
STATIC_KEY_CHECK_USE(key);
atomic_inc(&key->enabled);
}
static inline void static_key_slow_dec(struct static_key *key)
{
STATIC_KEY_CHECK_USE(key);
atomic_dec(&key->enabled);
}
下面我们用一段代码来分析static-key对程序分支跳转硬编码的影响。
#include
#include
#include
#include
DEFINE_STATIC_KEY_FALSE(key);
void func(int a){
if (static_branch_unlikely(&key)) {
printk("my_module: Feature is enabled\n");
} else {
printk("my_module: Feature is disabled\n");
}
}
static int __init my_module_init(void) {
pr_info("my_module: Module loaded\n");
int a = 1;
func(a);
static_branch_enable(&key);
func(a);
return 0;
}
static void __exit my_module_exit(void) {
pr_info("my_module: Module unloaded\n");
}
module_init(my_module_init);
module_exit(my_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Sample Kernel Module with Static Key");
func汇编代码如下:
0000000000000000 <func>:
0: a9bf7bfd stp x29, x30, [sp, #-16]!
4: 910003fd mov x29, sp
8: d503201f nop
c: 90000000 adrp x0, 0 <func>
10: 91000000 add x0, x0, #0x0
14: 94000000 bl 0 <printk>
18: a8c17bfd ldp x29, x30, [sp], #16
1c: d65f03c0 ret
20: 90000000 adrp x0, 0 <func>
24: 91000000 add x0, x0, #0x0
28: 94000000 bl 0 <printk>
2c: 17fffffb b 18 <func+0x18>
func中不适用static-key时,汇编代码如下:
void func(int a){
if (a) {
printk("my_module: Feature is enabled\n");
} else {
printk("my_module: Feature is disabled\n");
}
}
0000000000000000 <func>:
0: a9bf7bfd stp x29, x30, [sp, #-16]!
4: 910003fd mov x29, sp
8: 340000a0 cbz w0, 1c <func+0x1c>
c: 90000000 adrp x0, 0 <func>
10: 91000000 add x0, x0, #0x0
14: 94000000 bl 0 <printk>
18: 14000004 b 28 <func+0x28>
1c: 90000000 adrp x0, 0 <func>
20: 91000000 add x0, x0, #0x0
24: 94000000 bl 0 <printk>
28: a8c17bfd ldp x29, x30, [sp], #16
2c: d65f03c0 ret
对比可以发现,在0x8地址处,使用static-key编码在编译时将cbz指令替换为了nop指令,减少了程序运行时对比次数。