mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	RDMA/core: Convert UMEM ODP DMA mapping to caching IOVA and page linkage
Reuse newly added DMA API to cache IOVA and only link/unlink pages in fast path for UMEM ODP flow. Tested-by: Jens Axboe <axboe@kernel.dk> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
This commit is contained in:
		
							parent
							
								
									eedd5b1276
								
							
						
					
					
						commit
						1efe8c0670
					
				
					 6 changed files with 74 additions and 116 deletions
				
			
		| 
						 | 
					@ -41,6 +41,7 @@
 | 
				
			||||||
#include <linux/hugetlb.h>
 | 
					#include <linux/hugetlb.h>
 | 
				
			||||||
#include <linux/interval_tree.h>
 | 
					#include <linux/interval_tree.h>
 | 
				
			||||||
#include <linux/hmm.h>
 | 
					#include <linux/hmm.h>
 | 
				
			||||||
 | 
					#include <linux/hmm-dma.h>
 | 
				
			||||||
#include <linux/pagemap.h>
 | 
					#include <linux/pagemap.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <rdma/ib_umem_odp.h>
 | 
					#include <rdma/ib_umem_odp.h>
 | 
				
			||||||
| 
						 | 
					@ -50,6 +51,7 @@
 | 
				
			||||||
static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 | 
					static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 | 
				
			||||||
				   const struct mmu_interval_notifier_ops *ops)
 | 
									   const struct mmu_interval_notifier_ops *ops)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct ib_device *dev = umem_odp->umem.ibdev;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	umem_odp->umem.is_odp = 1;
 | 
						umem_odp->umem.is_odp = 1;
 | 
				
			||||||
| 
						 | 
					@ -59,7 +61,6 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 | 
				
			||||||
		size_t page_size = 1UL << umem_odp->page_shift;
 | 
							size_t page_size = 1UL << umem_odp->page_shift;
 | 
				
			||||||
		unsigned long start;
 | 
							unsigned long start;
 | 
				
			||||||
		unsigned long end;
 | 
							unsigned long end;
 | 
				
			||||||
		size_t ndmas, npfns;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		start = ALIGN_DOWN(umem_odp->umem.address, page_size);
 | 
							start = ALIGN_DOWN(umem_odp->umem.address, page_size);
 | 
				
			||||||
		if (check_add_overflow(umem_odp->umem.address,
 | 
							if (check_add_overflow(umem_odp->umem.address,
 | 
				
			||||||
| 
						 | 
					@ -70,36 +71,23 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 | 
				
			||||||
		if (unlikely(end < page_size))
 | 
							if (unlikely(end < page_size))
 | 
				
			||||||
			return -EOVERFLOW;
 | 
								return -EOVERFLOW;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ndmas = (end - start) >> umem_odp->page_shift;
 | 
							ret = hmm_dma_map_alloc(dev->dma_device, &umem_odp->map,
 | 
				
			||||||
		if (!ndmas)
 | 
										(end - start) >> PAGE_SHIFT,
 | 
				
			||||||
			return -EINVAL;
 | 
										1 << umem_odp->page_shift);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
		npfns = (end - start) >> PAGE_SHIFT;
 | 
								return ret;
 | 
				
			||||||
		umem_odp->pfn_list = kvcalloc(
 | 
					 | 
				
			||||||
			npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
 | 
					 | 
				
			||||||
		if (!umem_odp->pfn_list)
 | 
					 | 
				
			||||||
			return -ENOMEM;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		umem_odp->dma_list = kvcalloc(
 | 
					 | 
				
			||||||
			ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL);
 | 
					 | 
				
			||||||
		if (!umem_odp->dma_list) {
 | 
					 | 
				
			||||||
			ret = -ENOMEM;
 | 
					 | 
				
			||||||
			goto out_pfn_list;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ret = mmu_interval_notifier_insert(&umem_odp->notifier,
 | 
							ret = mmu_interval_notifier_insert(&umem_odp->notifier,
 | 
				
			||||||
						   umem_odp->umem.owning_mm,
 | 
											   umem_odp->umem.owning_mm,
 | 
				
			||||||
						   start, end - start, ops);
 | 
											   start, end - start, ops);
 | 
				
			||||||
		if (ret)
 | 
							if (ret)
 | 
				
			||||||
			goto out_dma_list;
 | 
								goto out_free_map;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_dma_list:
 | 
					out_free_map:
 | 
				
			||||||
	kvfree(umem_odp->dma_list);
 | 
						hmm_dma_map_free(dev->dma_device, &umem_odp->map);
 | 
				
			||||||
out_pfn_list:
 | 
					 | 
				
			||||||
	kvfree(umem_odp->pfn_list);
 | 
					 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -262,6 +250,8 @@ EXPORT_SYMBOL(ib_umem_odp_get);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 | 
					void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct ib_device *dev = umem_odp->umem.ibdev;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Ensure that no more pages are mapped in the umem.
 | 
						 * Ensure that no more pages are mapped in the umem.
 | 
				
			||||||
	 *
 | 
						 *
 | 
				
			||||||
| 
						 | 
					@ -274,48 +264,17 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 | 
				
			||||||
					    ib_umem_end(umem_odp));
 | 
										    ib_umem_end(umem_odp));
 | 
				
			||||||
		mutex_unlock(&umem_odp->umem_mutex);
 | 
							mutex_unlock(&umem_odp->umem_mutex);
 | 
				
			||||||
		mmu_interval_notifier_remove(&umem_odp->notifier);
 | 
							mmu_interval_notifier_remove(&umem_odp->notifier);
 | 
				
			||||||
		kvfree(umem_odp->dma_list);
 | 
							hmm_dma_map_free(dev->dma_device, &umem_odp->map);
 | 
				
			||||||
		kvfree(umem_odp->pfn_list);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	put_pid(umem_odp->tgid);
 | 
						put_pid(umem_odp->tgid);
 | 
				
			||||||
	kfree(umem_odp);
 | 
						kfree(umem_odp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(ib_umem_odp_release);
 | 
					EXPORT_SYMBOL(ib_umem_odp_release);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Map for DMA and insert a single page into the on-demand paging page tables.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * @umem: the umem to insert the page to.
 | 
					 | 
				
			||||||
 * @dma_index: index in the umem to add the dma to.
 | 
					 | 
				
			||||||
 * @page: the page struct to map and add.
 | 
					 | 
				
			||||||
 * @access_mask: access permissions needed for this page.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * The function returns -EFAULT if the DMA mapping operation fails.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static int ib_umem_odp_map_dma_single_page(
 | 
					 | 
				
			||||||
		struct ib_umem_odp *umem_odp,
 | 
					 | 
				
			||||||
		unsigned int dma_index,
 | 
					 | 
				
			||||||
		struct page *page)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct ib_device *dev = umem_odp->umem.ibdev;
 | 
					 | 
				
			||||||
	dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift,
 | 
					 | 
				
			||||||
				    DMA_BIDIRECTIONAL);
 | 
					 | 
				
			||||||
	if (ib_dma_mapping_error(dev, *dma_addr)) {
 | 
					 | 
				
			||||||
		*dma_addr = 0;
 | 
					 | 
				
			||||||
		return -EFAULT;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	umem_odp->npages++;
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
 | 
					 * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Maps the range passed in the argument to DMA addresses.
 | 
					 * Maps the range passed in the argument to DMA addresses.
 | 
				
			||||||
 * The DMA addresses of the mapped pages is updated in umem_odp->dma_list.
 | 
					 | 
				
			||||||
 * Upon success the ODP MR will be locked to let caller complete its device
 | 
					 * Upon success the ODP MR will be locked to let caller complete its device
 | 
				
			||||||
 * page table update.
 | 
					 * page table update.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -372,7 +331,7 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
 | 
				
			||||||
			range.default_flags |= HMM_PFN_REQ_WRITE;
 | 
								range.default_flags |= HMM_PFN_REQ_WRITE;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]);
 | 
						range.hmm_pfns = &(umem_odp->map.pfn_list[pfn_start_idx]);
 | 
				
			||||||
	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 | 
						timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
retry:
 | 
					retry:
 | 
				
			||||||
| 
						 | 
					@ -423,16 +382,6 @@ int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
 | 
				
			||||||
				  __func__, hmm_order, page_shift);
 | 
									  __func__, hmm_order, page_shift);
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					 | 
				
			||||||
		ret = ib_umem_odp_map_dma_single_page(
 | 
					 | 
				
			||||||
			umem_odp, dma_index,
 | 
					 | 
				
			||||||
			hmm_pfn_to_page(range.hmm_pfns[pfn_index]));
 | 
					 | 
				
			||||||
		if (ret < 0) {
 | 
					 | 
				
			||||||
			ibdev_dbg(umem_odp->umem.ibdev,
 | 
					 | 
				
			||||||
				  "ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		range.hmm_pfns[pfn_index] |= HMM_PFN_DMA_MAPPED;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	/* upon success lock should stay on hold for the callee */
 | 
						/* upon success lock should stay on hold for the callee */
 | 
				
			||||||
	if (!ret)
 | 
						if (!ret)
 | 
				
			||||||
| 
						 | 
					@ -452,32 +401,23 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
 | 
				
			||||||
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 | 
					void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 | 
				
			||||||
				 u64 bound)
 | 
									 u64 bound)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	dma_addr_t dma;
 | 
					 | 
				
			||||||
	int idx;
 | 
					 | 
				
			||||||
	u64 addr;
 | 
					 | 
				
			||||||
	struct ib_device *dev = umem_odp->umem.ibdev;
 | 
						struct ib_device *dev = umem_odp->umem.ibdev;
 | 
				
			||||||
 | 
						u64 addr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lockdep_assert_held(&umem_odp->umem_mutex);
 | 
						lockdep_assert_held(&umem_odp->umem_mutex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	virt = max_t(u64, virt, ib_umem_start(umem_odp));
 | 
						virt = max_t(u64, virt, ib_umem_start(umem_odp));
 | 
				
			||||||
	bound = min_t(u64, bound, ib_umem_end(umem_odp));
 | 
						bound = min_t(u64, bound, ib_umem_end(umem_odp));
 | 
				
			||||||
	for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
 | 
						for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
 | 
				
			||||||
		unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >>
 | 
							u64 offset = addr - ib_umem_start(umem_odp);
 | 
				
			||||||
					PAGE_SHIFT;
 | 
							size_t idx = offset >> umem_odp->page_shift;
 | 
				
			||||||
		struct page *page =
 | 
							unsigned long pfn = umem_odp->map.pfn_list[idx];
 | 
				
			||||||
			hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
 | 
							if (!hmm_dma_unmap_pfn(dev->dma_device, &umem_odp->map, idx))
 | 
				
			||||||
		dma = umem_odp->dma_list[idx];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (!(umem_odp->pfn_list[pfn_idx] & HMM_PFN_VALID))
 | 
					 | 
				
			||||||
			goto clear;
 | 
					 | 
				
			||||||
		if (!(umem_odp->pfn_list[pfn_idx] & HMM_PFN_DMA_MAPPED))
 | 
					 | 
				
			||||||
			goto clear;
 | 
								goto clear;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ib_dma_unmap_page(dev, dma, BIT(umem_odp->page_shift),
 | 
							if (pfn & HMM_PFN_WRITE) {
 | 
				
			||||||
				  DMA_BIDIRECTIONAL);
 | 
								struct page *page = hmm_pfn_to_page(pfn);
 | 
				
			||||||
		if (umem_odp->pfn_list[pfn_idx] & HMM_PFN_WRITE) {
 | 
					 | 
				
			||||||
			struct page *head_page = compound_head(page);
 | 
								struct page *head_page = compound_head(page);
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * set_page_dirty prefers being called with
 | 
								 * set_page_dirty prefers being called with
 | 
				
			||||||
| 
						 | 
					@ -492,7 +432,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		umem_odp->npages--;
 | 
							umem_odp->npages--;
 | 
				
			||||||
clear:
 | 
					clear:
 | 
				
			||||||
		umem_odp->pfn_list[pfn_idx] &= ~HMM_PFN_FLAGS;
 | 
							umem_odp->map.pfn_list[idx] &= ~HMM_PFN_FLAGS;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
 | 
					EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1474,7 +1474,7 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
 | 
				
			||||||
int __init mlx5_ib_odp_init(void);
 | 
					int __init mlx5_ib_odp_init(void);
 | 
				
			||||||
void mlx5_ib_odp_cleanup(void);
 | 
					void mlx5_ib_odp_cleanup(void);
 | 
				
			||||||
int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
 | 
					int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
 | 
				
			||||||
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 | 
					int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 | 
				
			||||||
			  struct mlx5_ib_mr *mr, int flags);
 | 
								  struct mlx5_ib_mr *mr, int flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 | 
					int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 | 
				
			||||||
| 
						 | 
					@ -1496,8 +1496,11 @@ static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 | 
					static inline int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 | 
				
			||||||
					 struct mlx5_ib_mr *mr, int flags) {}
 | 
										struct mlx5_ib_mr *mr, int flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return -EOPNOTSUPP;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int
 | 
					static inline int
 | 
				
			||||||
mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 | 
					mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -35,6 +35,8 @@
 | 
				
			||||||
#include <linux/dma-buf.h>
 | 
					#include <linux/dma-buf.h>
 | 
				
			||||||
#include <linux/dma-resv.h>
 | 
					#include <linux/dma-resv.h>
 | 
				
			||||||
#include <linux/hmm.h>
 | 
					#include <linux/hmm.h>
 | 
				
			||||||
 | 
					#include <linux/hmm-dma.h>
 | 
				
			||||||
 | 
					#include <linux/pci-p2pdma.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "mlx5_ib.h"
 | 
					#include "mlx5_ib.h"
 | 
				
			||||||
#include "cmd.h"
 | 
					#include "cmd.h"
 | 
				
			||||||
| 
						 | 
					@ -159,40 +161,50 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void populate_mtt(__be64 *pas, size_t idx, size_t nentries,
 | 
					static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
 | 
				
			||||||
			struct mlx5_ib_mr *mr, int flags)
 | 
								struct mlx5_ib_mr *mr, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
 | 
						struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
 | 
				
			||||||
	bool downgrade = flags & MLX5_IB_UPD_XLT_DOWNGRADE;
 | 
						bool downgrade = flags & MLX5_IB_UPD_XLT_DOWNGRADE;
 | 
				
			||||||
	unsigned long pfn;
 | 
						struct pci_p2pdma_map_state p2pdma_state = {};
 | 
				
			||||||
	dma_addr_t pa;
 | 
						struct ib_device *dev = odp->umem.ibdev;
 | 
				
			||||||
	size_t i;
 | 
						size_t i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (flags & MLX5_IB_UPD_XLT_ZAP)
 | 
						if (flags & MLX5_IB_UPD_XLT_ZAP)
 | 
				
			||||||
		return;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < nentries; i++) {
 | 
						for (i = 0; i < nentries; i++) {
 | 
				
			||||||
		pfn = odp->pfn_list[idx + i];
 | 
							unsigned long pfn = odp->map.pfn_list[start + i];
 | 
				
			||||||
 | 
							dma_addr_t dma_addr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							pfn = odp->map.pfn_list[start + i];
 | 
				
			||||||
		if (!(pfn & HMM_PFN_VALID))
 | 
							if (!(pfn & HMM_PFN_VALID))
 | 
				
			||||||
			/* ODP initialization */
 | 
								/* ODP initialization */
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pa = odp->dma_list[idx + i];
 | 
							dma_addr = hmm_dma_map_pfn(dev->dma_device, &odp->map,
 | 
				
			||||||
		pa |= MLX5_IB_MTT_READ;
 | 
										   start + i, &p2pdma_state);
 | 
				
			||||||
		if ((pfn & HMM_PFN_WRITE) && !downgrade)
 | 
							if (ib_dma_mapping_error(dev, dma_addr))
 | 
				
			||||||
			pa |= MLX5_IB_MTT_WRITE;
 | 
								return -EFAULT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pas[i] = cpu_to_be64(pa);
 | 
							dma_addr |= MLX5_IB_MTT_READ;
 | 
				
			||||||
 | 
							if ((pfn & HMM_PFN_WRITE) && !downgrade)
 | 
				
			||||||
 | 
								dma_addr |= MLX5_IB_MTT_WRITE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							pas[i] = cpu_to_be64(dma_addr);
 | 
				
			||||||
 | 
							odp->npages++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 | 
					int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
 | 
				
			||||||
			  struct mlx5_ib_mr *mr, int flags)
 | 
								  struct mlx5_ib_mr *mr, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
 | 
						if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
 | 
				
			||||||
		populate_klm(xlt, idx, nentries, mr, flags);
 | 
							populate_klm(xlt, idx, nentries, mr, flags);
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		populate_mtt(xlt, idx, nentries, mr, flags);
 | 
							return populate_mtt(xlt, idx, nentries, mr, flags);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -303,7 +315,7 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
 | 
				
			||||||
		 * estimate the cost of another UMR vs. the cost of bigger
 | 
							 * estimate the cost of another UMR vs. the cost of bigger
 | 
				
			||||||
		 * UMR.
 | 
							 * UMR.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (umem_odp->pfn_list[idx] & HMM_PFN_VALID) {
 | 
							if (umem_odp->map.pfn_list[idx] & HMM_PFN_VALID) {
 | 
				
			||||||
			if (!in_block) {
 | 
								if (!in_block) {
 | 
				
			||||||
				blk_start_idx = idx;
 | 
									blk_start_idx = idx;
 | 
				
			||||||
				in_block = 1;
 | 
									in_block = 1;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -840,7 +840,17 @@ int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 | 
				
			||||||
		size_to_map = npages * desc_size;
 | 
							size_to_map = npages * desc_size;
 | 
				
			||||||
		dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
 | 
							dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
 | 
				
			||||||
					DMA_TO_DEVICE);
 | 
										DMA_TO_DEVICE);
 | 
				
			||||||
		mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
 | 
							/*
 | 
				
			||||||
 | 
							 * npages is the maximum number of pages to map, but we
 | 
				
			||||||
 | 
							 * can't guarantee that all pages are actually mapped.
 | 
				
			||||||
 | 
							 *
 | 
				
			||||||
 | 
							 * For example, if page is p2p of type which is not supported
 | 
				
			||||||
 | 
							 * for mapping, the number of pages mapped will be less than
 | 
				
			||||||
 | 
							 * requested.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							err = mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
 | 
				
			||||||
 | 
							if (err)
 | 
				
			||||||
 | 
								return err;
 | 
				
			||||||
		dma_sync_single_for_device(ddev, sg.addr, sg.length,
 | 
							dma_sync_single_for_device(ddev, sg.addr, sg.length,
 | 
				
			||||||
					   DMA_TO_DEVICE);
 | 
										   DMA_TO_DEVICE);
 | 
				
			||||||
		sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
 | 
							sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -205,7 +205,7 @@ static int __rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
 | 
				
			||||||
	while (length > 0) {
 | 
						while (length > 0) {
 | 
				
			||||||
		u8 *src, *dest;
 | 
							u8 *src, *dest;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
 | 
							page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]);
 | 
				
			||||||
		user_va = kmap_local_page(page);
 | 
							user_va = kmap_local_page(page);
 | 
				
			||||||
		if (!user_va)
 | 
							if (!user_va)
 | 
				
			||||||
			return -EFAULT;
 | 
								return -EFAULT;
 | 
				
			||||||
| 
						 | 
					@ -289,7 +289,7 @@ static enum resp_states rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	idx = rxe_odp_iova_to_index(umem_odp, iova);
 | 
						idx = rxe_odp_iova_to_index(umem_odp, iova);
 | 
				
			||||||
	page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
 | 
						page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
 | 
				
			||||||
	page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
 | 
						page = hmm_pfn_to_page(umem_odp->map.pfn_list[idx]);
 | 
				
			||||||
	if (!page)
 | 
						if (!page)
 | 
				
			||||||
		return RESPST_ERR_RKEY_VIOLATION;
 | 
							return RESPST_ERR_RKEY_VIOLATION;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -355,7 +355,7 @@ int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
 | 
				
			||||||
		index = rxe_odp_iova_to_index(umem_odp, iova);
 | 
							index = rxe_odp_iova_to_index(umem_odp, iova);
 | 
				
			||||||
		page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
 | 
							page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		page = hmm_pfn_to_page(umem_odp->pfn_list[index]);
 | 
							page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
 | 
				
			||||||
		if (!page) {
 | 
							if (!page) {
 | 
				
			||||||
			mutex_unlock(&umem_odp->umem_mutex);
 | 
								mutex_unlock(&umem_odp->umem_mutex);
 | 
				
			||||||
			return -EFAULT;
 | 
								return -EFAULT;
 | 
				
			||||||
| 
						 | 
					@ -401,7 +401,7 @@ enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
 | 
						page_offset = rxe_odp_iova_to_page_offset(umem_odp, iova);
 | 
				
			||||||
	index = rxe_odp_iova_to_index(umem_odp, iova);
 | 
						index = rxe_odp_iova_to_index(umem_odp, iova);
 | 
				
			||||||
	page = hmm_pfn_to_page(umem_odp->pfn_list[index]);
 | 
						page = hmm_pfn_to_page(umem_odp->map.pfn_list[index]);
 | 
				
			||||||
	if (!page) {
 | 
						if (!page) {
 | 
				
			||||||
		mutex_unlock(&umem_odp->umem_mutex);
 | 
							mutex_unlock(&umem_odp->umem_mutex);
 | 
				
			||||||
		return RESPST_ERR_RKEY_VIOLATION;
 | 
							return RESPST_ERR_RKEY_VIOLATION;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,24 +8,17 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <rdma/ib_umem.h>
 | 
					#include <rdma/ib_umem.h>
 | 
				
			||||||
#include <rdma/ib_verbs.h>
 | 
					#include <rdma/ib_verbs.h>
 | 
				
			||||||
#include <linux/hmm.h>
 | 
					#include <linux/hmm-dma.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct ib_umem_odp {
 | 
					struct ib_umem_odp {
 | 
				
			||||||
	struct ib_umem umem;
 | 
						struct ib_umem umem;
 | 
				
			||||||
	struct mmu_interval_notifier notifier;
 | 
						struct mmu_interval_notifier notifier;
 | 
				
			||||||
	struct pid *tgid;
 | 
						struct pid *tgid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* An array of the pfns included in the on-demand paging umem. */
 | 
						struct hmm_dma_map map;
 | 
				
			||||||
	unsigned long *pfn_list;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * An array with DMA addresses mapped for pfns in pfn_list.
 | 
						 * The umem_mutex protects the page_list field of an ODP
 | 
				
			||||||
	 * The lower two bits designate access permissions.
 | 
					 | 
				
			||||||
	 * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	dma_addr_t		*dma_list;
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * The umem_mutex protects the page_list and dma_list fields of an ODP
 | 
					 | 
				
			||||||
	 * umem, allowing only a single thread to map/unmap pages. The mutex
 | 
						 * umem, allowing only a single thread to map/unmap pages. The mutex
 | 
				
			||||||
	 * also protects access to the mmu notifier counters.
 | 
						 * also protects access to the mmu notifier counters.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue