• rmda 虚拟地址物理地址转换


    [SPDK/NVMe存储技术分析]015 - 理解内存注册(Memory Registration) - 程序员大本营

    drivers/infiniband/core/umem.c

    /**

    * ib_umem_get - Pin and DMA map userspace memory.

    *

    * @device: IB device to connect UMEM

    * @addr: userspace virtual address to start at

    * @size: length of region to pin

    * @access: IB_ACCESS_xxx flags for memory being pinned

    */

    struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,

                 size_t size, int access)

    {

        struct ib_umem *umem;

        struct page **page_list;

        unsigned long lock_limit;

        unsigned long new_pinned;

        unsigned long cur_base;

        unsigned long dma_attr = 0;

        struct mm_struct *mm;

        unsigned long npages;

        int pinned, ret;

        unsigned int gup_flags = FOLL_WRITE;

        /*

         * If the combination of the addr and size requested for this memory

         * region causes an integer overflow, return error.

         */

        if (((addr + size) < addr) ||

         PAGE_ALIGN(addr + size) < (addr + size))

            return ERR_PTR(-EINVAL);

        if (!can_do_mlock())

            return ERR_PTR(-EPERM);

        if (access & IB_ACCESS_ON_DEMAND)

            return ERR_PTR(-EOPNOTSUPP);

        umem = kzalloc(sizeof(*umem), GFP_KERNEL);

        if (!umem)

            return ERR_PTR(-ENOMEM);

        umem->ibdev = device;

        umem->length = size;

        umem->address = addr;

        /*

         * Drivers should call ib_umem_find_best_pgsz() to set the iova

         * correctly.

         */

        umem->iova = addr;

        umem->writable = ib_access_writable(access);

        umem->owning_mm = mm = current->mm;

        mmgrab(mm);

        page_list = (struct page **) __get_free_page(GFP_KERNEL);

        if (!page_list) {

            ret = -ENOMEM;

            goto umem_kfree;

        }

        npages = ib_umem_num_pages(umem);

        if (npages == 0 || npages > UINT_MAX) {

            ret = -EINVAL;

            goto out;

        }

        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;

        new_pinned = atomic64_add_return(npages, &mm->pinned_vm);

        if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {

            atomic64_sub(npages, &mm->pinned_vm);

            ret = -ENOMEM;

            goto out;

        }

        cur_base = addr & PAGE_MASK;

        if (!umem->writable)

            gup_flags |= FOLL_FORCE;

        while (npages) {

            cond_resched();

            pinned = pin_user_pages_fast(cur_base,

                         min_t(unsigned long, npages,

                            PAGE_SIZE /

                            sizeof(struct page *)),

                         gup_flags | FOLL_LONGTERM, page_list);

            if (pinned < 0) {

                ret = pinned;

                goto umem_release;

            }

            cur_base += pinned * PAGE_SIZE;

            npages -= pinned;

            ret = sg_alloc_append_table_from_pages(

                &umem->sgt_append, page_list, pinned, 0,

                pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),

                npages, GFP_KERNEL);

            if (ret) {

                unpin_user_pages_dirty_lock(page_list, pinned, 0);

                goto umem_release;

            }

        }

        if (access & IB_ACCESS_RELAXED_ORDERING)

            dma_attr |= DMA_ATTR_WEAK_ORDERING;

        ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,

                     DMA_BIDIRECTIONAL, dma_attr);

        if (ret)

            goto umem_release;

        goto out;

    umem_release:

        __ib_umem_release(device, umem, 0);

        atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);

    out:

        free_page((unsigned long) page_list);

    umem_kfree:

        if (ret) {

            mmdrop(umem->owning_mm);

            kfree(umem);

        }

        return ret ? ERR_PTR(ret) : umem;

    }

    EXPORT_SYMBOL(ib_umem_get);

    include/rdma/ibverbs.h

    /**

    * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses

    * @dev: The device for which the DMA addresses are to be created

    * @sg: The sg_table object describing the buffer

    * @direction: The direction of the DMA

    * @attrs: Optional DMA attributes for the map operation

    */

    static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,

                         struct sg_table *sgt,

                         enum dma_data_direction direction,

                         unsigned long dma_attrs)

    {

        int nents;

        if (ib_uses_virt_dma(dev)) {

            nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);

            if (!nents)

                return -EIO;

            sgt->nents = nents;

            return 0;

        }

        return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);

    }

    infiniband/core/device.c

    #ifdef CONFIG_INFINIBAND_VIRT_DMA

    int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)

    {

        struct scatterlist *s;

        int i;

        for_each_sg(sg, s, nents, i) {

            sg_dma_address(s) = (uintptr_t)sg_virt(s);

            sg_dma_len(s) = s->length;

        }

        return nents;

    }

    EXPORT_SYMBOL(ib_dma_virt_map_sg);

    #endif /* CONFIG_INFINIBAND_VIRT_DMA */

    kernel/dma/mapping.c

    /**

    * dma_map_sgtable - Map the given buffer for DMA

    * @dev:    The device for which to perform the DMA operation

    * @sgt:    The sg_table object describing the buffer

    * @dir:    DMA direction

    * @attrs:  Optional DMA attributes for the map operation

    *

    * Maps a buffer described by a scatterlist stored in the given sg_table

    * object for the @dir DMA operation by the @dev device. After success, the

    * ownership for the buffer is transferred to the DMA domain. One has to

    * call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the

    * ownership of the buffer back to the CPU domain before touching the

    * buffer by the CPU.

    *

    * Returns 0 on success or a negative error code on error. The following

    * error codes are supported with the given meaning:

    *

    * -EINVAL    An invalid argument, unaligned access or other error

    *      in usage. Will not succeed if retried.

    * -ENOMEM    Insufficient resources (like memory or IOVA space) to

    *      complete the mapping. Should succeed if retried later.

    * -EIO   Legacy error code with an unknown meaning. eg. this is

    *      returned if a lower level call returned DMA_MAPPING_ERROR.

    */

    int dma_map_sgtable(struct device *dev, struct sg_table *sgt,

             enum dma_data_direction dir, unsigned long attrs)

    {

        int nents;

        nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);

        if (nents < 0)

            return nents;

        sgt->nents = nents;

        return 0;

    }

    EXPORT_SYMBOL_GPL(dma_map_sgtable);

    static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,

         int nents, enum dma_data_direction dir, unsigned long attrs)

    {

        const struct dma_map_ops *ops = get_dma_ops(dev);

        int ents;

        BUG_ON(!valid_dma_direction(dir));

        if (WARN_ON_ONCE(!dev->dma_mask))

            return 0;

        if (dma_map_direct(dev, ops) ||

         arch_dma_map_sg_direct(dev, sg, nents))

            ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);

        else

            ents = ops->map_sg(dev, sg, nents, dir, attrs);

        if (ents > 0)

            debug_dma_map_sg(dev, sg, nents, ents, dir, attrs);

        else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM &&

                 ents != -EIO))

            return -EIO;

        return ents;

    }

    kernel/dma/direct.c

    int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,

            enum dma_data_direction dir, unsigned long attrs)

    {

        int i;

        struct scatterlist *sg;

        for_each_sg(sgl, sg, nents, i) {

            sg->dma_address = dma_direct_map_page(dev, sg_page(sg),

                    sg->offset, sg->length, dir, attrs);

            if (sg->dma_address == DMA_MAPPING_ERROR)

                goto out_unmap;

            sg_dma_len(sg) = sg->length;

        }

        return nents;

    out_unmap:

        dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);

        return -EIO;

    }

    kernel/dma/direct.h

    static inline dma_addr_t dma_direct_map_page(struct device *dev,

            struct page *page, unsigned long offset, size_t size,

            enum dma_data_direction dir, unsigned long attrs)

    {

        phys_addr_t phys = page_to_phys(page) + offset;

        dma_addr_t dma_addr = phys_to_dma(dev, phys);

        if (is_swiotlb_force_bounce(dev))

            return swiotlb_map(dev, phys, size, dir, attrs);

        if (unlikely(!dma_capable(dev, dma_addr, size, true))) {

            if (is_swiotlb_active(dev))

                return swiotlb_map(dev, phys, size, dir, attrs);

            dev_WARN_ONCE(dev, 1,

                 "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",

                 &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);

            return DMA_MAPPING_ERROR;

        }

        if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))

            arch_sync_dma_for_device(phys, size, dir);

        return dma_addr;

    }

  • 相关阅读:
    恋爱脑学Rust之dyn关键字的作用
    Java的代理:静态代理、JDK/CGLIB的动态代理
    Linux环境编程
    【机器学习300问】102、什么是混淆矩阵?
    经典排序算法总结
    UE4 C++:UPROPERTY宏、属性说明符、元数据说明符
    DTI-ALPS处理笔记
    Python 图形化界面基础篇:处理鼠标事件
    管理方法论:4. 一对一沟通——了解真实情况
    JAVA【设计模式】享元模式
  • 原文地址:https://blog.csdn.net/baidu_38316985/article/details/126247318