mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs
Combine contiguous regions of PAGE_SIZE pages into single scatter list entry while building the scatter table for a umem. This minimizes the number of the entries in the scatter list and reduces the DMA mapping overhead, particularly with the IOMMU. Set default max_seg_size in core for IB devices to 2G and do not combine if we exceed this limit. Also, purge npages in struct ib_umem as we now DMA map the umem SGL with sg_nents and npage computation is not needed. Drivers should now be using ib_umem_num_pages(), so fix the last stragglers. Move npages tracking to ib_umem_odp as ODP drivers still need it. Suggested-by: Jason Gunthorpe <jgg@ziepe.ca> Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Acked-by: Adit Ranadive <aditr@vmware.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Tested-by: Gal Pressman <galpress@amazon.com> Tested-by: Selvin Xavier <selvin.xavier@broadcom.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
		
							parent
							
								
									c7252a6532
								
							
						
					
					
						commit
						d10bcf947a
					
				
					 7 changed files with 95 additions and 29 deletions
				
			
		| 
						 | 
				
			
			@ -1089,6 +1089,9 @@ static void setup_dma_device(struct ib_device *device)
 | 
			
		|||
		WARN_ON_ONCE(!parent);
 | 
			
		||||
		device->dma_device = parent;
 | 
			
		||||
	}
 | 
			
		||||
	/* Setup default max segment size for all IB devices */
 | 
			
		||||
	dma_set_max_seg_size(device->dma_device, SZ_2G);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -39,25 +39,22 @@
 | 
			
		|||
#include <linux/export.h>
 | 
			
		||||
#include <linux/hugetlb.h>
 | 
			
		||||
#include <linux/slab.h>
 | 
			
		||||
#include <linux/pagemap.h>
 | 
			
		||||
#include <rdma/ib_umem_odp.h>
 | 
			
		||||
 | 
			
		||||
#include "uverbs.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
 | 
			
		||||
{
 | 
			
		||||
	struct scatterlist *sg;
 | 
			
		||||
	struct sg_page_iter sg_iter;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	if (umem->nmap > 0)
 | 
			
		||||
		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
 | 
			
		||||
				umem->npages,
 | 
			
		||||
		ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
 | 
			
		||||
				DMA_BIDIRECTIONAL);
 | 
			
		||||
 | 
			
		||||
	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
 | 
			
		||||
 | 
			
		||||
		page = sg_page(sg);
 | 
			
		||||
	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
 | 
			
		||||
		page = sg_page_iter_page(&sg_iter);
 | 
			
		||||
		if (!PageDirty(page) && umem->writable && dirty)
 | 
			
		||||
			set_page_dirty_lock(page);
 | 
			
		||||
		put_page(page);
 | 
			
		||||
| 
						 | 
				
			
			@ -66,6 +63,69 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 | 
			
		|||
	sg_free_table(&umem->sg_head);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
 | 
			
		||||
 *
 | 
			
		||||
 * sg: current scatterlist entry
 | 
			
		||||
 * page_list: array of npage struct page pointers
 | 
			
		||||
 * npages: number of pages in page_list
 | 
			
		||||
 * max_seg_sz: maximum segment size in bytes
 | 
			
		||||
 * nents: [out] number of entries in the scatterlist
 | 
			
		||||
 *
 | 
			
		||||
 * Return new end of scatterlist
 | 
			
		||||
 */
 | 
			
		||||
static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
 | 
			
		||||
						struct page **page_list,
 | 
			
		||||
						unsigned long npages,
 | 
			
		||||
						unsigned int max_seg_sz,
 | 
			
		||||
						int *nents)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long first_pfn;
 | 
			
		||||
	unsigned long i = 0;
 | 
			
		||||
	bool update_cur_sg = false;
 | 
			
		||||
	bool first = !sg_page(sg);
 | 
			
		||||
 | 
			
		||||
	/* Check if new page_list is contiguous with end of previous page_list.
 | 
			
		||||
	 * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
 | 
			
		||||
	 */
 | 
			
		||||
	if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
 | 
			
		||||
		       page_to_pfn(page_list[0])))
 | 
			
		||||
		update_cur_sg = true;
 | 
			
		||||
 | 
			
		||||
	while (i != npages) {
 | 
			
		||||
		unsigned long len;
 | 
			
		||||
		struct page *first_page = page_list[i];
 | 
			
		||||
 | 
			
		||||
		first_pfn = page_to_pfn(first_page);
 | 
			
		||||
 | 
			
		||||
		/* Compute the number of contiguous pages we have starting
 | 
			
		||||
		 * at i
 | 
			
		||||
		 */
 | 
			
		||||
		for (len = 0; i != npages &&
 | 
			
		||||
			      first_pfn + len == page_to_pfn(page_list[i]);
 | 
			
		||||
		     len++)
 | 
			
		||||
			i++;
 | 
			
		||||
 | 
			
		||||
		/* Squash N contiguous pages from page_list into current sge */
 | 
			
		||||
		if (update_cur_sg &&
 | 
			
		||||
		    ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT))) {
 | 
			
		||||
			sg_set_page(sg, sg_page(sg),
 | 
			
		||||
				    sg->length + (len << PAGE_SHIFT), 0);
 | 
			
		||||
			update_cur_sg = false;
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/* Squash N contiguous pages into next sge or first sge */
 | 
			
		||||
		if (!first)
 | 
			
		||||
			sg = sg_next(sg);
 | 
			
		||||
 | 
			
		||||
		(*nents)++;
 | 
			
		||||
		sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
 | 
			
		||||
		first = false;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return sg;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ib_umem_get - Pin and DMA map userspace memory.
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -93,7 +153,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 | 
			
		|||
	int ret;
 | 
			
		||||
	int i;
 | 
			
		||||
	unsigned long dma_attrs = 0;
 | 
			
		||||
	struct scatterlist *sg, *sg_list_start;
 | 
			
		||||
	struct scatterlist *sg;
 | 
			
		||||
	unsigned int gup_flags = FOLL_WRITE;
 | 
			
		||||
 | 
			
		||||
	if (!udata)
 | 
			
		||||
| 
						 | 
				
			
			@ -190,7 +250,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 | 
			
		|||
	if (!umem->writable)
 | 
			
		||||
		gup_flags |= FOLL_FORCE;
 | 
			
		||||
 | 
			
		||||
	sg_list_start = umem->sg_head.sgl;
 | 
			
		||||
	sg = umem->sg_head.sgl;
 | 
			
		||||
 | 
			
		||||
	while (npages) {
 | 
			
		||||
		down_read(&mm->mmap_sem);
 | 
			
		||||
| 
						 | 
				
			
			@ -203,28 +263,29 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 | 
			
		|||
			goto umem_release;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		umem->npages += ret;
 | 
			
		||||
		cur_base += ret * PAGE_SIZE;
 | 
			
		||||
		npages   -= ret;
 | 
			
		||||
 | 
			
		||||
		sg = ib_umem_add_sg_table(sg, page_list, ret,
 | 
			
		||||
			dma_get_max_seg_size(context->device->dma_device),
 | 
			
		||||
			&umem->sg_nents);
 | 
			
		||||
 | 
			
		||||
		/* Continue to hold the mmap_sem as vma_list access
 | 
			
		||||
		 * needs to be protected.
 | 
			
		||||
		 */
 | 
			
		||||
		for_each_sg(sg_list_start, sg, ret, i) {
 | 
			
		||||
		for (i = 0; i < ret && umem->hugetlb; i++) {
 | 
			
		||||
			if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
 | 
			
		||||
				umem->hugetlb = 0;
 | 
			
		||||
 | 
			
		||||
			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
 | 
			
		||||
		}
 | 
			
		||||
		up_read(&mm->mmap_sem);
 | 
			
		||||
 | 
			
		||||
		/* preparing for next loop */
 | 
			
		||||
		sg_list_start = sg;
 | 
			
		||||
		up_read(&mm->mmap_sem);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sg_mark_end(sg);
 | 
			
		||||
 | 
			
		||||
	umem->nmap = ib_dma_map_sg_attrs(context->device,
 | 
			
		||||
				  umem->sg_head.sgl,
 | 
			
		||||
				  umem->npages,
 | 
			
		||||
				  umem->sg_nents,
 | 
			
		||||
				  DMA_BIDIRECTIONAL,
 | 
			
		||||
				  dma_attrs);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -320,8 +381,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 | 
			
		|||
		return -EINVAL;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
 | 
			
		||||
				 offset + ib_umem_offset(umem));
 | 
			
		||||
	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, ib_umem_num_pages(umem),
 | 
			
		||||
				 dst, length, offset + ib_umem_offset(umem));
 | 
			
		||||
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
		return ret;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -526,7 +526,7 @@ static int ib_umem_odp_map_dma_single_page(
 | 
			
		|||
		}
 | 
			
		||||
		umem_odp->dma_list[page_index] = dma_addr | access_mask;
 | 
			
		||||
		umem_odp->page_list[page_index] = page;
 | 
			
		||||
		umem->npages++;
 | 
			
		||||
		umem_odp->npages++;
 | 
			
		||||
	} else if (umem_odp->page_list[page_index] == page) {
 | 
			
		||||
		umem_odp->dma_list[page_index] |= access_mask;
 | 
			
		||||
	} else {
 | 
			
		||||
| 
						 | 
				
			
			@ -752,7 +752,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 | 
			
		|||
			}
 | 
			
		||||
			umem_odp->page_list[idx] = NULL;
 | 
			
		||||
			umem_odp->dma_list[idx] = 0;
 | 
			
		||||
			umem->npages--;
 | 
			
		||||
			umem_odp->npages--;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	mutex_unlock(&umem_odp->umem_mutex);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -288,7 +288,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
 | 
			
		|||
 | 
			
		||||
	ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
 | 
			
		||||
 | 
			
		||||
	if (unlikely(!umem->npages && mr->parent &&
 | 
			
		||||
	if (unlikely(!umem_odp->npages && mr->parent &&
 | 
			
		||||
		     !umem_odp->dying)) {
 | 
			
		||||
		WRITE_ONCE(umem_odp->dying, 1);
 | 
			
		||||
		atomic_inc(&mr->parent->num_leaf_free);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -119,7 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 | 
			
		|||
	union pvrdma_cmd_resp rsp;
 | 
			
		||||
	struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
 | 
			
		||||
	struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
 | 
			
		||||
	int ret;
 | 
			
		||||
	int ret, npages;
 | 
			
		||||
 | 
			
		||||
	if (length == 0 || length > dev->dsr->caps.max_mr_size) {
 | 
			
		||||
		dev_warn(&dev->pdev->dev, "invalid mem region length\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -133,9 +133,10 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 | 
			
		|||
		return ERR_CAST(umem);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 | 
			
		||||
	npages = ib_umem_num_pages(umem);
 | 
			
		||||
	if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
 | 
			
		||||
		dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
 | 
			
		||||
			 umem->npages);
 | 
			
		||||
			 npages);
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
		goto err_umem;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -150,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 | 
			
		|||
	mr->mmr.size = length;
 | 
			
		||||
	mr->umem = umem;
 | 
			
		||||
 | 
			
		||||
	ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false);
 | 
			
		||||
	ret = pvrdma_page_dir_init(dev, &mr->pdir, npages, false);
 | 
			
		||||
	if (ret) {
 | 
			
		||||
		dev_warn(&dev->pdev->dev,
 | 
			
		||||
			 "could not allocate page directory\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -167,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 | 
			
		|||
	cmd->length = length;
 | 
			
		||||
	cmd->pd_handle = to_vpd(pd)->pd_handle;
 | 
			
		||||
	cmd->access_flags = access_flags;
 | 
			
		||||
	cmd->nchunks = umem->npages;
 | 
			
		||||
	cmd->nchunks = npages;
 | 
			
		||||
	cmd->pdir_dma = mr->pdir.dir_dma;
 | 
			
		||||
 | 
			
		||||
	ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -53,7 +53,7 @@ struct ib_umem {
 | 
			
		|||
	struct work_struct	work;
 | 
			
		||||
	struct sg_table sg_head;
 | 
			
		||||
	int             nmap;
 | 
			
		||||
	int             npages;
 | 
			
		||||
	unsigned int    sg_nents;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Returns the offset of the umem start relative to the first page. */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -69,6 +69,7 @@ struct ib_umem_odp {
 | 
			
		|||
 | 
			
		||||
	int notifiers_seq;
 | 
			
		||||
	int notifiers_count;
 | 
			
		||||
	int npages;
 | 
			
		||||
 | 
			
		||||
	/* Tree tracking */
 | 
			
		||||
	struct umem_odp_node	interval_tree;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue