• linux 内核中的pid和前缀树


    前言:

    写这个文章的初衷是因为今天手写了一个字典树,然后写字典树以后忽然想到了之前看的技术文章,linux kernel 之前的pid 申请方式已经从 bitmap 变成了 基数树,所以打算写文章再回顾一下这种数据结构算法

    一、内核中pid的申请

    旧的内核中pid的申请就不多说了,是使用的是bitmap去实现的,新版的换为了基数树。申请pid的内核代码在:

    1. struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
    2. size_t set_tid_size)
    3. {
    4. .......
    5. }

    继续看代码,申请函数为:

    1. if (tid) {
    2. nr = idr_alloc(&tmp->idr, NULL, tid,
    3. tid + 1, GFP_ATOMIC);
    4. /*
    5. * If ENOSPC is returned it means that the PID is
    6. * alreay in use. Return EEXIST in that case.
    7. */
    8. if (nr == -ENOSPC)
    9. nr = -EEXIST;
    10. } else {
    11. int pid_min = 1;
    12. /*
    13. * init really needs pid 1, but after reaching the
    14. * maximum wrap back to RESERVED_PIDS
    15. */
    16. if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
    17. pid_min = RESERVED_PIDS;
    18. /*
    19. * Store a null pointer so find_pid_ns does not find
    20. * a partially initialized PID (see below).
    21. */
    22. nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
    23. pid_max, GFP_ATOMIC);
    24. }

    进程最大的pid号为:

    1. int pid_max = PID_MAX_DEFAULT;
    2. /*
    3. * This controls the default maximum pid allocated to a process
    4. */
    5. #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)

    这里我发现了一个问题,linux 最大的pid 是 0x8000 而不是 0xFFFF,这是为了和老的内核兼容,我们可以通过

    cat /proc/sys/kernel/pid_max

    查询当前内核的最大pid。

    申请pid 函数我们聚焦在

    idr_alloc 和 idr_alloc_cyclic

    再进去看 idr_alloc_cyclic 的实现

    1. int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
    2. {
    3. u32 id = idr->idr_next;
    4. int err, max = end > 0 ? end - 1 : INT_MAX;
    5. if ((int)id < start)
    6. id = start;
    7. err = idr_alloc_u32(idr, ptr, &id, max, gfp);
    8. if ((err == -ENOSPC) && (id > start)) {
    9. id = start;
    10. err = idr_alloc_u32(idr, ptr, &id, max, gfp);
    11. }
    12. if (err)
    13. return err;
    14. idr->idr_next = id + 1;
    15. return id;
    16. }

    具体实现pid 申请的函数在:

    1. int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
    2. unsigned long max, gfp_t gfp)
    3. {
    4. struct radix_tree_iter iter;
    5. void __rcu **slot;
    6. unsigned int base = idr->idr_base;
    7. unsigned int id = *nextid;
    8. if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
    9. idr->idr_rt.xa_flags |= IDR_RT_MARKER;
    10. id = (id < base) ? 0 : id - base;
    11. radix_tree_iter_init(&iter, id);
    12. slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
    13. if (IS_ERR(slot))
    14. return PTR_ERR(slot);
    15. *nextid = iter.index + base;
    16. /* there is a memory barrier inside radix_tree_iter_replace() */
    17. radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
    18. radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
    19. return 0;
    20. }

    我们需要重点分析这个函数,重要的数据结构radix_tree_iter,我们看一下这个数据结构:

    这是一个基数树的迭代器

    1. /**
    2. * struct radix_tree_iter - radix tree iterator state
    3. *
    4. * @index: index of current slot
    5. * @next_index: one beyond the last index for this chunk
    6. * @tags: bit-mask for tag-iterating
    7. * @node: node that contains current slot
    8. *
    9. * This radix tree iterator works in terms of "chunks" of slots. A chunk is a
    10. * subinterval of slots contained within one radix tree leaf node. It is
    11. * described by a pointer to its first slot and a struct radix_tree_iter
    12. * which holds the chunk's position in the tree and its size. For tagged
    13. * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
    14. * radix tree tag.
    15. */
    16. struct radix_tree_iter {
    17. unsigned long index;
    18. unsigned long next_index;
    19. unsigned long tags;
    20. struct radix_tree_node *node;
    21. };

    迭代器中有重要的数据结构

    #define radix_tree_node		xa_node

    也就是替换后 变为了

    struct xa_node*

    我们再继续看 xa_node* 这个数据结构,这是基数树的节点

    1. /*
    2. * @count is the count of every non-NULL element in the ->slots array
    3. * whether that is a value entry, a retry entry, a user pointer,
    4. * a sibling entry or a pointer to the next level of the tree.
    5. * @nr_values is the count of every element in ->slots which is
    6. * either a value entry or a sibling of a value entry.
    7. */
    8. struct xa_node {
    9. unsigned char shift; /* Bits remaining in each slot */
    10. unsigned char offset; /* Slot offset in parent */
    11. unsigned char count; /* Total entry count */
    12. unsigned char nr_values; /* Value entry count */
    13. struct xa_node __rcu *parent; /* NULL at top of tree */
    14. struct xarray *array; /* The array we belong to */
    15. union {
    16. struct list_head private_list; /* For tree user */
    17. struct rcu_head rcu_head; /* Used when freeing node */
    18. };
    19. void __rcu *slots[XA_CHUNK_SIZE];
    20. union {
    21. unsigned long tags[XA_MAX_MARKS][XA_MARK_LONGS];
    22. unsigned long marks[XA_MAX_MARKS][XA_MARK_LONGS];
    23. };
    24. };

    这些宏:

    1. #ifndef XA_CHUNK_SHIFT
    2. #define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
    3. #endif
    4. #define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT)

    XA_CHUNK_SIZE 是 64 或者16.

    #define XA_MAX_MARKS		3

    对应的CHUNK是6个位或者 4个位

    然后我们看到pid最大是0x8000, 4 *4 为 16 个 槽,这意味着基数树高最大为3((16 / 6 ) + 1)

    1. #if defined __x86_64__ && !defined __ILP32__
    2. # define __WORDSIZE 64
    3. #else
    4. # define __WORDSIZE 32
    5. #define __WORDSIZE32_SIZE_ULONG 0
    6. #define __WORDSIZE32_PTRDIFF_LONG 0
    7. #endif
    8. #ifndef BITS_PER_LONG
    9. # define BITS_PER_LONG __WORDSIZE
    10. #endif

    BITS_PER_LONG 为64 或者32

    1. #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
    2. /*
    3. * The IDR API does not expose the tagging functionality of the radix tree
    4. * to users. Use tag 0 to track whether a node has free space below it.
    5. */
    6. #define IDR_FREE 0

    带入计算就是:

    (64 + 64 -1) / 64 = 1

    对应的宏

    1. #define XA_MAX_MARKS 3
    2. #define XA_MARK_LONGS DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG)

    初始化基数树的迭代器:

    1. /**
    2. * radix_tree_iter_init - initialize radix tree iterator
    3. *
    4. * @iter: pointer to iterator state
    5. * @start: iteration starting index
    6. * Returns: NULL
    7. */
    8. static __always_inline void __rcu **
    9. radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
    10. {
    11. /*
    12. * Leave iter->tags uninitialized. radix_tree_next_chunk() will fill it
    13. * in the case of a successful tagged chunk lookup. If the lookup was
    14. * unsuccessful or non-tagged then nobody cares about ->tags.
    15. *
    16. * Set index to zero to bypass next_index overflow protection.
    17. * See the comment in radix_tree_next_chunk() for details.
    18. */
    19. iter->index = 0;
    20. iter->next_index = start;
    21. return NULL;
    22. }

    主要看idr_get_free,这个函数要看懂

    shift -= RADIX_TREE_MAP_SHIFT;

    shift 最大是16, 每次我们移动是6位,对应的是CHUNKSIZE

    1. if (!tag_get(node, IDR_FREE, offset)) {
    2. offset = radix_tree_find_next_bit(node, IDR_FREE,
    3. offset + 1);
    4. start = next_index(start, node, offset);
    5. if (start > max || start == 0)
    6. return ERR_PTR(-ENOSPC);
    7. while (offset == RADIX_TREE_MAP_SIZE) {
    8. offset = node->offset + 1;
    9. node = node->parent;
    10. if (!node)
    11. goto grow;
    12. shift = node->shift;
    13. }
    14. child = rcu_dereference_raw(node->slots[offset]);
    15. }

    每次循环如果循环到tag不是IDR_FREE,就会继续找下一个tag,直到找到是IDR_FREE的solt。

    在这里退出循环

    1. else if (!radix_tree_is_internal_node(child))
    2. break;

    最后返回空闲槽

    1. iter->index = start;
    2. if (node)
    3. iter->next_index = 1 + min(max, (start | node_maxindex(node)));
    4. else
    5. iter->next_index = 1;
    6. iter->node = node;
    7. set_iter_tags(iter, node, offset, IDR_FREE);
    8. return slot;

    上面的start 就应该是要申请的pid,我们最后看next_index

    1. /*
    2. * The maximum index which can be stored in a radix tree
    3. */
    4. static inline unsigned long shift_maxindex(unsigned int shift)
    5. {
    6. return (RADIX_TREE_MAP_SIZE << shift) - 1;
    7. }
    8. static inline unsigned long node_maxindex(const struct radix_tree_node *node)
    9. {
    10. return shift_maxindex(node->shift);
    11. }
    12. static unsigned long next_index(unsigned long index,
    13. const struct radix_tree_node *node,
    14. unsigned long offset)
    15. {
    16. return (index & ~node_maxindex(node)) + (offset << node->shift);
    17. }

    所以start 就是要申请的pid。

  • 相关阅读:
    云原生周刊:Linkerd 发布 v2.14 | 2023.9.4
    AMD 显卡Radeon Super Resolution(Radeon显卡超分辨率) 功能,你开启了么?
    cesium for ue左下角图标隐藏
    JVM 对 Java 的 原 生 锁 做 了 哪 些 优 化 ?
    Js逆向教程18-l参数分解
    20220803NOI模拟赛--考后总结
    【JavaSE】抽象类与接口(下篇)
    【微信小程序-初级实战】商品/表单编辑
    2024年软考重大改革
    持续集成部署-k8s-配置与存储-配置管理:ConfigMap 的热更新
  • 原文地址:https://blog.csdn.net/qq_32783703/article/details/133847341