• dpdk Vhost 库


    1、 怎么实现vhost_dev的VhostOps的vhost_set_vring_kick和vhost_set_vring_call; vhost_net  kernel方式的vhost_set_vring_kick和vhost_set_vring_call依赖于/dev/vhost_net的ioctl

    有两种实现方式: 1、guest是server,dpdk vhost user是client  2、 guest是client,dpdk vhost user是server

    VHOST_SET_VRING_CALL  和 VHOST_SET_VRING_KICK的实现有两种:

      qemu vhost user: VhostOps user_ops

    dpdk : vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX]

    2、vhost-user怎么实现vhost_devconst VhostOps *vhost_ops;

     hw/virtio/vhost-backend.c:294:static const VhostOps kernel_ops = {
    hw/virtio/vhost-user.c:2357:const VhostOps user_ops = {
    include/hw/virtio/vhost-backend.h:175:extern const VhostOps user_ops;

    3、 vhost-user怎么实现vhost_devstruct vhost_virtqueue

     4、 vhost_kernel_ioctl处理VHOST_SET_VRING_CAL和VHOST_SET_VRING_KICK

     5、发送和接收

    /*将count个报文从host转发给guest*/
    uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
    struct rte_mbuf **pkts, uint16_t count)
    /*从guest接收count个报文,并存储到pkts中*/
    uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
    struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)

    struct vhost_dev {
        MemoryListener memory_listener;  /* MemoryListener是物理内存操作的回调函数集合 */
        struct vhost_memory *mem;
        int n_mem_sections;
        MemoryRegionSection *mem_sections;
        struct vhost_virtqueue *vqs;  /* vhost_virtqueue列表和个数 */
        int nvqs;
        /* the first virtuque which would be used by this vhost dev */
        int vq_index;
        unsigned long long features;  /* vhost设备支持的features */
        unsigned long long acked_features;  /* guest acked的features */
        unsigned long long backend_features;  /* backend, e.g. tap设备,支持的features */
        bool started;
        bool log_enabled;
        vhost_log_chunk_t *log;
        unsigned long long log_size;
        Error *migration_blocker;
        bool force;
        bool memory_changed;
        hwaddr mem_changed_start_addr;
        hwaddr mem_changed_end_addr;
        const VhostOps *vhost_ops; /* VhostOps基于kernel和user两种形态的vhost有不同的实现,内核的实现最终调用ioctl完成 */
        void *opaque;
    };
    static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = {
            [VHOST_USER_NONE] = NULL,
            [VHOST_USER_GET_FEATURES] = vhost_user_get_features,
            [VHOST_USER_SET_FEATURES] = vhost_user_set_features,
            [VHOST_USER_SET_OWNER] = vhost_user_set_owner,
            [VHOST_USER_RESET_OWNER] = vhost_user_reset_owner,
            [VHOST_USER_SET_MEM_TABLE] = vhost_user_set_mem_table,
            [VHOST_USER_SET_LOG_BASE] = vhost_user_set_log_base,
            [VHOST_USER_SET_LOG_FD] = vhost_user_set_log_fd,
            [VHOST_USER_SET_VRING_NUM] = vhost_user_set_vring_num,
            [VHOST_USER_SET_VRING_ADDR] = vhost_user_set_vring_addr,
            [VHOST_USER_SET_VRING_BASE] = vhost_user_set_vring_base,
            [VHOST_USER_GET_VRING_BASE] = vhost_user_get_vring_base,
            [VHOST_USER_SET_VRING_KICK] = vhost_user_set_vring_kick,
            [VHOST_USER_SET_VRING_CALL] = vhost_user_set_vring_call,
            [VHOST_USER_SET_VRING_ERR] = vhost_user_set_vring_err,
            [VHOST_USER_GET_PROTOCOL_FEATURES] = vhost_user_get_protocol_features,
            [VHOST_USER_SET_PROTOCOL_FEATURES] = vhost_user_set_protocol_features,
            [VHOST_USER_GET_QUEUE_NUM] = vhost_user_get_queue_num,
            [VHOST_USER_SET_VRING_ENABLE] = vhost_user_set_vring_enable,
            [VHOST_USER_SEND_RARP] = vhost_user_send_rarp,
            [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
            [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
            [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
            [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
            [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
            [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
            [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd,
            [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd,
    };

    const VhostOps user_ops = {
            .backend_type = VHOST_BACKEND_TYPE_USER,
            .vhost_backend_init = vhost_user_backend_init,
            .vhost_backend_cleanup = vhost_user_backend_cleanup,
            .vhost_backend_memslots_limit = vhost_user_memslots_limit,
            .vhost_set_log_base = vhost_user_set_log_base,
            .vhost_set_mem_table = vhost_user_set_mem_table,
            .vhost_set_vring_addr = vhost_user_set_vring_addr,
            .vhost_set_vring_endian = vhost_user_set_vring_endian,
            .vhost_set_vring_num = vhost_user_set_vring_num,
            .vhost_set_vring_base = vhost_user_set_vring_base,
            .vhost_get_vring_base = vhost_user_get_vring_base,
            .vhost_set_vring_kick = vhost_user_set_vring_kick,
            .vhost_set_vring_call = vhost_user_set_vring_call,
            .vhost_set_features = vhost_user_set_features,
            .vhost_get_features = vhost_user_get_features,
            .vhost_set_owner = vhost_user_set_owner,
            .vhost_reset_device = vhost_user_reset_device,
            .vhost_get_vq_index = vhost_user_get_vq_index,
            .vhost_set_vring_enable = vhost_user_set_vring_enable,
            .vhost_requires_shm_log = vhost_user_requires_shm_log,
            .vhost_migration_done = vhost_user_migration_done,
            .vhost_backend_can_merge = vhost_user_can_merge,
            .vhost_net_set_mtu = vhost_user_net_set_mtu,
            .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
            .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
            .vhost_get_config = vhost_user_get_config,
            .vhost_set_config = vhost_user_set_config,
            .vhost_crypto_create_session = vhost_user_crypto_create_session,
            .vhost_crypto_close_session = vhost_user_crypto_close_session,
            .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
            .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
            .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
    };

    Vhost-user Overview

    The goal of vhost-user is to implement such a Virtio transport, staying as close as possible to the vhost paradigm of using shared memory, ioeventfds and irqfds. A UNIX domain socket based mechanism allows to set up the resources used by a number of Vrings shared between two userspace processes, which will be placed in shared memory. The mechanism also configures the necessary eventfds to signal when a Vring gets a kick event from either side.

    Vhost-user has been implemented in QEMU via a set of patches, giving the option to pass any virtio_net Vrings directly to another userspace process, implementing a virtio_net backend outside QEMU. This way, direct Snabbswitch to a QEMU guest virtio_net communication can be realized.

    QEMU already implements the vhost interface for a fast zero-copy guest to host kernel data path. Configuration of this interface relies on a series of ioctls that define the control plane. In this scenario, the QEMU network backend invoked is the “tap” netdev. A usual way to run it is:

    $ qemu -netdev type=tap,script=/etc/kvm/kvm-ifup,id=net0,vhost=on \
                                                -device virtio-net-pci,netdev=net0
    

    The purpose of the vhost-user patches for QEMU is to provide the infrastructure and implementation of a user space vhost interface. The fundamental additions of this implementation are:

     Added an option to -mem-path to allocate guest RAM as memory that can be shared with another process.

     Use a Unix domain socket to communicate between QEMU and the user space vhost implementation.

     The user space application will receive file descriptors for the pre-allocated shared guest RAM. It will directly access the related vrings in the guest's memory space.

    Overall architecture of vhost-user

    In the target implementation the vhost client is in QEMU. The target backend is Snabbswitch.

    学习地址: Dpdk/网络协议栈/vpp/OvS/DDos/NFV/虚拟化/高性能专家-学习视频教程-腾讯课堂
    更多DPDK相关学习资料有需要的可以自行报名学习,免费订阅,久学习,或点击这里加qun免费
    领取,关注我持续更新哦! ! 

    Compilation and Usage

    QEMU Compilation

    A version of QEMU patched with the latest vhost-user patches can be retrieved from the Virtual Open Systems repository at GitHub - virtualopensystems/qemu: A standard QEMU tree with KVM for ARM patches, branch vhost-user-v5.

    To clone it:

    $ git clone -b vhost-user-v5 https://github.com/virtualopensystems/qemu.git
    

    Compilation is straightforward:

    $ mkdir qemu/obj
    $ cd qemu/obj/
    $ ../configure --target-list=x86_64-softmmu
    $ make -j
    

    This will build QEMU as qemu/obj/x86_64-softmmu/qemu-system-x86_64.

    Using QEMU with Vhost-user

    To run QEMU with the vhost-user backend, one has to provide the named UNIX domain socked that needs to be already opened by the backend:

    $ qemu -m 1024 -mem-path /hugetlbfs,prealloc=on,share=on \
    -netdev type=vhost-user,id=net0,file=/path/to/socket \
    -device virtio-net-pci,netdev=net0

    Vhost 库

    Vhost库实现了一个用户空间virtio网络服务器,允许用户直接操作virtio。 换句话说,它允许用户通过VM virtio网络设备获取/发送数据包。 为了达到这个功能,一个vhost库需要实现:

    • 访问guest内存:

      对于QEMU,这是通过使用 -object memory-backend-file,share=on,... 选项实现的。 这意味着QEMU将创建一个文件作为guest RAM。 选项 share=on 允许另一个进程映射该文件,这意味着该进程可以访问这个guest RAM。

    • 知道关于vring所有必要的信息:

      诸如可用环形存储链表的存储空间。Vhost定义了一些消息(通过Unix套接字传递)来告诉后端所有需要知道如何操作vring的信息。

    27.1. Vhost API 概述

    以下是一些关键的Vhost API函数概述:

    • rte_vhost_driver_register(path, flags)

      此函数将vhost驱动程序注册到系统中。path 指定Unix套接字的文件路径。

      当前支持的flags包括:

      • RTE_VHOST_USER_CLIENT

        当使用该flag时,DPDK vhost-user 作为客户端。 请参阅以下说明。

      • RTE_VHOST_USER_NO_RECONNECT

        当 DPDK vhost-user 作为客户端时,它将不断尝试连接到服务端(QEMU),知道成功。 这在以下两个情况中是非常有用的:

        • 当 QEMU 还没启动时
        • 当 QEMU 重启时(如guset OS 重启)

        这个重新连接选项是默认启用的,但是,可以通过设置这个标志来关闭它。

      • RTE_VHOST_USER_DEQUEUE_ZERO_COPY

        设置此flag时将启用出队了零复制。默认情况下是禁用的。

        在设置此标志时,需要知道以下原则:

        • 零拷贝对于小数据包(小于512)是不好的。

        • 零拷贝对VM2VM情况比较好。对于两个虚拟机之间的ipref,提升性能可能高达70%(当TSO使能时).

        • 对于VM2NIC情况,nb_tx_desc 必须足够小:如果未启动virtio间接特性则 <=64,否则 <= 128。

          这是因为,当启用出队列零拷贝时,只有当相应的mbuf被释放时,客户端TX使用的vring才会被更新。 因此,nb_tx_desc必须足够小,以便PMD驱动程序将耗尽可用的TX描述符,并及时释放mbufs。 否则,guset TX vring将无mbuf使用。

        • Guest的内存应该使用应该使用huge page支持以获得更好的性能。最好使用1G大小的页面。

          当启用出队零拷贝时,必须建立guest 物理地址和host物理地址之间的映射。 使用non-huge page则意味着更多的页面细分。 为了简单起见,DPDK vhost对这些段进行了线性搜索,因此,段越少,我们得到的映射就越快。 注意:将来我们可能使用树搜索来提升速度。

    • rte_vhost_driver_set_features(path, features)

      此函数设置vhost-user驱动支持的功能位。 vhost-user驱动可以是vhost-user net,但也可以是其他的࿰

  • 相关阅读:
    Windows系统怎么加密文件夹?
    齐博x1二次开发实例分享:模块安装文件讲解
    高性价比MOS推荐:惠海HC090N10L,HC025N10L,100V高耐压,12V/24V加湿器和3.7V打火机专用MOS
    pycharm debug调试点击结束断点报错KeyboardInterrupt
    基于SSM的校园二手物品交易市场设计与实现
    NVIDIA 7th SkyHackathon(七)Tao 目标检测模型可视化推理与导出
    Educational Codeforces Round 135 (构造、优先队列、区间DP)
    怎样在小程序中直播
    基于springboot实现漫画网站管理系统项目【项目源码+论文说明】
    Pytorch:Torch数据类型学习整理与记录
  • 原文地址:https://blog.csdn.net/lingshengxiyou/article/details/127870249