[SPDK/NVMe存储技术分析]015 - 理解内存注册(Memory Registration) - 程序员大本营
drivers/infiniband/core/umem.c
/**
* ib_umem_get - Pin and DMA map userspace memory.
*
* @device: IB device to connect UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
*/
struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
size_t size, int access)
{
struct ib_umem *umem;
struct page **page_list;
unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
unsigned long dma_attr = 0;
struct mm_struct *mm;
unsigned long npages;
int pinned, ret;
unsigned int gup_flags = FOLL_WRITE;
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
*/
if (((addr + size) < addr) ||
PAGE_ALIGN(addr + size) < (addr + size))
return ERR_PTR(-EINVAL);
if (!can_do_mlock())
return ERR_PTR(-EPERM);
if (access & IB_ACCESS_ON_DEMAND)
return ERR_PTR(-EOPNOTSUPP);
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->ibdev = device;
umem->length = size;
umem->address = addr;
/*
* Drivers should call ib_umem_find_best_pgsz() to set the iova
* correctly.
*/
umem->iova = addr;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
ret = -ENOMEM;
goto umem_kfree;
}
npages = ib_umem_num_pages(umem);
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
goto out;
}
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
atomic64_sub(npages, &mm->pinned_vm);
ret = -ENOMEM;
goto out;
}
cur_base = addr & PAGE_MASK;
if (!umem->writable)
gup_flags |= FOLL_FORCE;
while (npages) {
cond_resched();
pinned = pin_user_pages_fast(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE /
sizeof(struct page *)),
gup_flags | FOLL_LONGTERM, page_list);
if (pinned < 0) {
ret = pinned;
goto umem_release;
}
cur_base += pinned * PAGE_SIZE;
npages -= pinned;
ret = sg_alloc_append_table_from_pages(
&umem->sgt_append, page_list, pinned, 0,
pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
npages, GFP_KERNEL);
if (ret) {
unpin_user_pages_dirty_lock(page_list, pinned, 0);
goto umem_release;
}
}
if (access & IB_ACCESS_RELAXED_ORDERING)
dma_attr |= DMA_ATTR_WEAK_ORDERING;
ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
DMA_BIDIRECTIONAL, dma_attr);
if (ret)
goto umem_release;
goto out;
umem_release:
__ib_umem_release(device, umem, 0);
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
free_page((unsigned long) page_list);
umem_kfree:
if (ret) {
mmdrop(umem->owning_mm);
kfree(umem);
}
return ret ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
include/rdma/ibverbs.h
/**
* ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses
* @dev: The device for which the DMA addresses are to be created
* @sg: The sg_table object describing the buffer
* @direction: The direction of the DMA
* @attrs: Optional DMA attributes for the map operation
*/
static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
struct sg_table *sgt,
enum dma_data_direction direction,
unsigned long dma_attrs)
{
int nents;
if (ib_uses_virt_dma(dev)) {
nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents);
if (!nents)
return -EIO;
sgt->nents = nents;
return 0;
}
return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs);
}
infiniband/core/device.c
#ifdef CONFIG_INFINIBAND_VIRT_DMA
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
{
struct scatterlist *s;
int i;
for_each_sg(sg, s, nents, i) {
sg_dma_address(s) = (uintptr_t)sg_virt(s);
sg_dma_len(s) = s->length;
}
return nents;
}
EXPORT_SYMBOL(ib_dma_virt_map_sg);
#endif /* CONFIG_INFINIBAND_VIRT_DMA */
kernel/dma/mapping.c
/**
* dma_map_sgtable - Map the given buffer for DMA
* @dev: The device for which to perform the DMA operation
* @sgt: The sg_table object describing the buffer
* @dir: DMA direction
* @attrs: Optional DMA attributes for the map operation
*
* Maps a buffer described by a scatterlist stored in the given sg_table
* object for the @dir DMA operation by the @dev device. After success, the
* ownership for the buffer is transferred to the DMA domain. One has to
* call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the
* ownership of the buffer back to the CPU domain before touching the
* buffer by the CPU.
*
* Returns 0 on success or a negative error code on error. The following
* error codes are supported with the given meaning:
*
* -EINVAL An invalid argument, unaligned access or other error
* in usage. Will not succeed if retried.
* -ENOMEM Insufficient resources (like memory or IOVA space) to
* complete the mapping. Should succeed if retried later.
* -EIO Legacy error code with an unknown meaning. eg. this is
* returned if a lower level call returned DMA_MAPPING_ERROR.
*/
int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
enum dma_data_direction dir, unsigned long attrs)
{
int nents;
nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);
if (nents < 0)
return nents;
sgt->nents = nents;
return 0;
}
EXPORT_SYMBOL_GPL(dma_map_sgtable);
static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
int ents;
BUG_ON(!valid_dma_direction(dir));
if (WARN_ON_ONCE(!dev->dma_mask))
return 0;
if (dma_map_direct(dev, ops) ||
arch_dma_map_sg_direct(dev, sg, nents))
ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
else
ents = ops->map_sg(dev, sg, nents, dir, attrs);
if (ents > 0)
debug_dma_map_sg(dev, sg, nents, ents, dir, attrs);
else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM &&
ents != -EIO))
return -EIO;
return ents;
}
kernel/dma/direct.c
int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
int i;
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
sg->offset, sg->length, dir, attrs);
if (sg->dma_address == DMA_MAPPING_ERROR)
goto out_unmap;
sg_dma_len(sg) = sg->length;
}
return nents;
out_unmap:
dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
return -EIO;
}
kernel/dma/direct.h
static inline dma_addr_t dma_direct_map_page(struct device *dev,
struct page *page, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dma_addr = phys_to_dma(dev, phys);
if (is_swiotlb_force_bounce(dev))
return swiotlb_map(dev, phys, size, dir, attrs);
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
if (is_swiotlb_active(dev))
return swiotlb_map(dev, phys, size, dir, attrs);
dev_WARN_ONCE(dev, 1,
"DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
return DMA_MAPPING_ERROR;
}
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(phys, size, dir);
return dma_addr;
}