mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	io_uring: get rid of remap_pfn_range() for mapping rings/sqes
Rather than use remap_pfn_range() for this and manually free later, switch to using vm_insert_pages() and have it Just Work. If possible, allocate a single compound page that covers the range that is needed. If that works, then we can just use page_address() on that page. If we fail to get a compound page, allocate single pages and use vmap() to map them into the kernel virtual address space. This just covers the rings/sqes, the other remaining user of the mmap remap_pfn_range() user will be converted separately. Once that is done, we can kill the old alloc/free code. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									62346c6cb2
								
							
						
					
					
						commit
						3ab1db3c60
					
				
					 2 changed files with 133 additions and 8 deletions
				
			
		|  | @ -2599,6 +2599,36 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, | |||
| 	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; | ||||
| } | ||||
| 
 | ||||
| static void io_pages_unmap(void *ptr, struct page ***pages, | ||||
| 			   unsigned short *npages) | ||||
| { | ||||
| 	bool do_vunmap = false; | ||||
| 
 | ||||
| 	if (!ptr) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (*npages) { | ||||
| 		struct page **to_free = *pages; | ||||
| 		int i; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Only did vmap for the non-compound multiple page case. | ||||
| 		 * For the compound page, we just need to put the head. | ||||
| 		 */ | ||||
| 		if (PageCompound(to_free[0])) | ||||
| 			*npages = 1; | ||||
| 		else if (*npages > 1) | ||||
| 			do_vunmap = true; | ||||
| 		for (i = 0; i < *npages; i++) | ||||
| 			put_page(to_free[i]); | ||||
| 	} | ||||
| 	if (do_vunmap) | ||||
| 		vunmap(ptr); | ||||
| 	kvfree(*pages); | ||||
| 	*pages = NULL; | ||||
| 	*npages = 0; | ||||
| } | ||||
| 
 | ||||
| void io_mem_free(void *ptr) | ||||
| { | ||||
| 	if (!ptr) | ||||
|  | @ -2699,8 +2729,8 @@ static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr, | |||
| static void io_rings_free(struct io_ring_ctx *ctx) | ||||
| { | ||||
| 	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) { | ||||
| 		io_mem_free(ctx->rings); | ||||
| 		io_mem_free(ctx->sq_sqes); | ||||
| 		io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages); | ||||
| 		io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages); | ||||
| 	} else { | ||||
| 		io_pages_free(&ctx->ring_pages, ctx->n_ring_pages); | ||||
| 		ctx->n_ring_pages = 0; | ||||
|  | @ -2712,6 +2742,80 @@ static void io_rings_free(struct io_ring_ctx *ctx) | |||
| 	ctx->sq_sqes = NULL; | ||||
| } | ||||
| 
 | ||||
| static void *io_mem_alloc_compound(struct page **pages, int nr_pages, | ||||
| 				   size_t size, gfp_t gfp) | ||||
| { | ||||
| 	struct page *page; | ||||
| 	int i, order; | ||||
| 
 | ||||
| 	order = get_order(size); | ||||
| 	if (order > MAX_PAGE_ORDER) | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 	else if (order) | ||||
| 		gfp |= __GFP_COMP; | ||||
| 
 | ||||
| 	page = alloc_pages(gfp, order); | ||||
| 	if (!page) | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 
 | ||||
| 	for (i = 0; i < nr_pages; i++) | ||||
| 		pages[i] = page + i; | ||||
| 
 | ||||
| 	return page_address(page); | ||||
| } | ||||
| 
 | ||||
| static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size, | ||||
| 				 gfp_t gfp) | ||||
| { | ||||
| 	void *ret; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < nr_pages; i++) { | ||||
| 		pages[i] = alloc_page(gfp); | ||||
| 		if (!pages[i]) | ||||
| 			goto err; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| err: | ||||
| 	while (i--) | ||||
| 		put_page(pages[i]); | ||||
| 	return ERR_PTR(-ENOMEM); | ||||
| } | ||||
| 
 | ||||
| static void *io_pages_map(struct page ***out_pages, unsigned short *npages, | ||||
| 			  size_t size) | ||||
| { | ||||
| 	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; | ||||
| 	struct page **pages; | ||||
| 	int nr_pages; | ||||
| 	void *ret; | ||||
| 
 | ||||
| 	nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||||
| 	pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp); | ||||
| 	if (!pages) | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 
 | ||||
| 	ret = io_mem_alloc_compound(pages, nr_pages, size, gfp); | ||||
| 	if (!IS_ERR(ret)) | ||||
| 		goto done; | ||||
| 
 | ||||
| 	ret = io_mem_alloc_single(pages, nr_pages, size, gfp); | ||||
| 	if (!IS_ERR(ret)) { | ||||
| done: | ||||
| 		*out_pages = pages; | ||||
| 		*npages = nr_pages; | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	kvfree(pages); | ||||
| 	*out_pages = NULL; | ||||
| 	*npages = 0; | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| void *io_mem_alloc(size_t size) | ||||
| { | ||||
| 	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; | ||||
|  | @ -3298,14 +3402,12 @@ static void *io_uring_validate_mmap_request(struct file *file, | |||
| 		/* Don't allow mmap if the ring was setup without it */ | ||||
| 		if (ctx->flags & IORING_SETUP_NO_MMAP) | ||||
| 			return ERR_PTR(-EINVAL); | ||||
| 		ptr = ctx->rings; | ||||
| 		break; | ||||
| 		return ctx->rings; | ||||
| 	case IORING_OFF_SQES: | ||||
| 		/* Don't allow mmap if the ring was setup without it */ | ||||
| 		if (ctx->flags & IORING_SETUP_NO_MMAP) | ||||
| 			return ERR_PTR(-EINVAL); | ||||
| 		ptr = ctx->sq_sqes; | ||||
| 		break; | ||||
| 		return ctx->sq_sqes; | ||||
| 	case IORING_OFF_PBUF_RING: { | ||||
| 		struct io_buffer_list *bl; | ||||
| 		unsigned int bgid; | ||||
|  | @ -3329,11 +3431,22 @@ static void *io_uring_validate_mmap_request(struct file *file, | |||
| 	return ptr; | ||||
| } | ||||
| 
 | ||||
| int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, | ||||
| 			struct page **pages, int npages) | ||||
| { | ||||
| 	unsigned long nr_pages = npages; | ||||
| 
 | ||||
| 	vm_flags_set(vma, VM_DONTEXPAND); | ||||
| 	return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_MMU | ||||
| 
 | ||||
| static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) | ||||
| { | ||||
| 	struct io_ring_ctx *ctx = file->private_data; | ||||
| 	size_t sz = vma->vm_end - vma->vm_start; | ||||
| 	long offset = vma->vm_pgoff << PAGE_SHIFT; | ||||
| 	unsigned long pfn; | ||||
| 	void *ptr; | ||||
| 
 | ||||
|  | @ -3341,6 +3454,16 @@ static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 	if (IS_ERR(ptr)) | ||||
| 		return PTR_ERR(ptr); | ||||
| 
 | ||||
| 	switch (offset & IORING_OFF_MMAP_MASK) { | ||||
| 	case IORING_OFF_SQ_RING: | ||||
| 	case IORING_OFF_CQ_RING: | ||||
| 		return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, | ||||
| 						ctx->n_ring_pages); | ||||
| 	case IORING_OFF_SQES: | ||||
| 		return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages, | ||||
| 						ctx->n_sqe_pages); | ||||
| 	} | ||||
| 
 | ||||
| 	pfn = virt_to_phys(ptr) >> PAGE_SHIFT; | ||||
| 	return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); | ||||
| } | ||||
|  | @ -3630,7 +3753,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, | |||
| 		return -EOVERFLOW; | ||||
| 
 | ||||
| 	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) | ||||
| 		rings = io_mem_alloc(size); | ||||
| 		rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size); | ||||
| 	else | ||||
| 		rings = io_rings_map(ctx, p->cq_off.user_addr, size); | ||||
| 
 | ||||
|  | @ -3655,7 +3778,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, | |||
| 	} | ||||
| 
 | ||||
| 	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) | ||||
| 		ptr = io_mem_alloc(size); | ||||
| 		ptr = io_pages_map(&ctx->sqe_pages, &ctx->n_sqe_pages, size); | ||||
| 	else | ||||
| 		ptr = io_sqes_map(ctx, p->sq_off.user_addr, size); | ||||
| 
 | ||||
|  |  | |||
|  | @ -70,6 +70,8 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags); | |||
| void __io_commit_cqring_flush(struct io_ring_ctx *ctx); | ||||
| 
 | ||||
| struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); | ||||
| int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, | ||||
| 			struct page **pages, int npages); | ||||
| 
 | ||||
| struct file *io_file_get_normal(struct io_kiocb *req, int fd); | ||||
| struct file *io_file_get_fixed(struct io_kiocb *req, int fd, | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jens Axboe
						Jens Axboe