• QEMU 结构体对齐产生的问题


    背景

    最近在鲲鹏 aarch64 服务器上,发现了一个 QEMU 的异常,启动虚拟机的时候加载 UEFI firmware 时间很长,表现就是需要几分钟才能出现虚拟机的 grub 界面。这个问题不是稳定复现,通过进一步研究发现,问题出在 qemu 的 vhost 部分。

    修复的 patch 在这里。 这里涉及到 3 个 patch,主要内容如下:

    1. | diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
    2. | index 07179bb74f..45997cbf27 100644
    3. | --- a/hw/virtio/vhost.c
    4. | +++ b/hw/virtio/vhost.c
    5. | @@ -451,8 +451,13 @@ static void vhost_commit(MemoryListener *listener)
    6. | changed = true;
    7. | } else {
    8. | /* Same size, lets check the contents */
    9. | - changed = n_old_sections && memcmp(dev->mem_sections, old_sections,
    10. | - n_old_sections * sizeof(old_sections[0])) != 0;
    11. | + for (i = 0; i < n_old_sections; i++) {
    12. | + if (!MemoryRegionSection_eq(&old_sections[i],
    13. | + &dev->mem_sections[i])) {
    14. | + changed = true;
    15. | + break;
    16. | + }
    17. | + }
    18. | }
    19. |
    20. | trace_vhost_commit(dev->started, changed);
    21. | diff --git a/include/exec/memory.h b/include/exec/memory.h
    22. | index 32fb294308..a0eeff13e3 100644
    23. | --- a/include/exec/memory.h
    24. | +++ b/include/exec/memory.h
    25. | @@ -391,6 +391,17 @@ struct MemoryRegionSection {
    26. | bool readonly;
    27. | };
    28. |
    29. | +static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
    30. | + MemoryRegionSection *b)
    31. | +{
    32. | + return a->mr == b->mr &&
    33. | + a->fv == b->fv &&
    34. | + a->offset_within_region == b->offset_within_region &&
    35. | + a->offset_within_address_space == b->offset_within_address_space &&
    36. | + int128_eq(a->size, b->size) &&
    37. | + a->readonly == b->readonly;
    38. | +}
    39. | +

    可以看到,核心就是去掉 memcmp 比较两个结构体,而是手动的去一个字段一个字段的比较,这里的差别是什么呢?答案是对齐填充。

    原因

    memcmp 比较的类型如下所示,这个类型的长度为 56 字节。

    1. struct Int128 {
    2. uint64_t lo;
    3. int64_t hi;
    4. };
    5. struct MemoryRegionSection {
    6. struct Int128 size;
    7. void *mr;
    8. void *fv;
    9. uint64_t offset_within_region;
    10. uint64_t offset_within_address_space;
    11. bool readonly;
    12. };

    学习地址: Dpdk/网络协议栈/vpp/OvS/DDos/NFV/虚拟化/高性能专家-学习视频教程-腾讯课堂
    更多DPDK相关学习资料有需要的可以自行报名学习,免费订阅,久学习,或点击这里加qun免费
    领取,关注我持续更新哦! ! 

    POC 代码如下:

    1. struct MemoryRegionSection {
    2. struct Int128 size;
    3. void *mr;
    4. void *fv;
    5. uint64_t offset_within_region;
    6. uint64_t offset_within_address_space;
    7. bool readonly;
    8. };
    9. void hexdump(void *ptr, size_t size) {
    10. size_t i;
    11. unsigned char *p = (unsigned char*)ptr;
    12. for (i = 0; i < size; i++) {
    13. printf("%02x ", p[i]);
    14. }
    15. printf("\n");
    16. }
    17. int main() {
    18. struct MemoryRegionSection section1, section2;
    19. printf("sizeof(struct MemoryRegionSection) = %lu\n",
    20. sizeof(struct MemoryRegionSection));
    21. printf("offsetof(struct MemoryRegionSection, readonly) = %lu\n",
    22. offsetof(struct MemoryRegionSection, readonly));
    23. section2.size.hi = section1.size.hi = 0xabcdef;
    24. section2.size.lo = section1.size.lo = 0xabcdef;
    25. section2.mr = section1.mr = (void*)0xabcdef;
    26. section2.fv = section1.fv = (void*)0xabcdef;
    27. section2.offset_within_region = section1.offset_within_region = 0xabcdef;
    28. section2.offset_within_address_space = section1.offset_within_address_space = 0xabcdef;
    29. section2.readonly = section1.readonly = true;
    30. printf("memcmp(section1, section2, sizeof(section1)) = %d\n",
    31. memcmp(&section1, &section2, sizeof(section1)));
    32. printf("memcmp(section1, section2, 48) = %d\n",
    33. memcmp(&section1, &section2, 48));
    34. printf("hexdump(section1, sizeof(section1))\n");
    35. hexdump(&section1, sizeof(section1));
    36. printf("hexdump(section2, sizeof(section2))\n");
    37. hexdump(&section2, sizeof(section2));
    38. return 0;
    39. }

    输出:

    1. sizeof(struct MemoryRegionSection) = 56
    2. offsetof(struct MemoryRegionSection, readonly) = 48
    3. memcmp(section1, section2, sizeof(section1)) = -64
    4. memcmp(section1, section2, 48) = 0
    5. hexdump(section1, sizeof(section1))
    6. ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 01 <0a> 73 23 ff 7e 00 00
    7. hexdump(section2, sizeof(section2))
    8. ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 ef cd ab 00 00 00 00 00 01 <4a> 72 23 ff 7e 00 00

    很明显,结构体在 64 bit 系统上自动会进行对齐到 8 字节,所以最后的 bool 1 字节后面会自动填充 7 个字节,都是未初始化的内存,直接进行 memcmp 是非常危险的。 很大可能跟期望的不一样。

    结语

    Qemu 发生这样的低级错误,有些出乎意料。memcmp 在使用的时候需要小心,需要考虑到结构体的内存布局,内存是否经过初始化,否则就跟期望的不一致。

    原文链接:https://zhuanlan.zhihu.com/p/558121669

  • 相关阅读:
    【深基16.例1】淘汰赛(下)
    微信小程序八(自定义tabBar)
    创建运行nnunet的docker镜像,并且使用nnunet训练自己的2D数据
    docker 更换Docker Root Dir
    基于知识问答的上下文学习中的代码风格11.20
    GnuTLS recv error (-54): Error in the pull function.
    Spring Cloud Sentinel整合Nacos实现配置持久化
    1011 World Cup Betting
    markdown 如何书写 arg min max公式
    【python爬虫】—图片爬取
  • 原文地址:https://blog.csdn.net/lingshengxiyou/article/details/127773497