mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	io_uring/kbuf: use vm_insert_pages() for mmap'ed pbuf ring
Rather than use remap_pfn_range() for this and manually free later, switch to using vm_insert_page() and have it Just Work. This requires a bit of effort on the mmap lookup side, as the ctx uring_lock isn't held, which otherwise protects buffer_lists from being torn down, and it's not safe to grab from mmap context that would introduce an ABBA deadlock between the mmap lock and the ctx uring_lock. Instead, lookup the buffer_list under RCU, as the the list is RCU freed already. Use the existing reference count to determine whether it's possible to safely grab a reference to it (eg if it's not zero already), and drop that reference when done with the mapping. If the mmap reference is the last one, the buffer_list and the associated memory can go away, since the vma insertion has references to the inserted pages at that point. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									e270bfd22a
								
							
						
					
					
						commit
						87585b0575
					
				
					 5 changed files with 46 additions and 156 deletions
				
			
		| 
						 | 
					@ -372,9 +372,6 @@ struct io_ring_ctx {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct list_head	io_buffers_cache;
 | 
						struct list_head	io_buffers_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* deferred free list, protected by ->uring_lock */
 | 
					 | 
				
			||||||
	struct hlist_head	io_buf_list;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Keep this last, we don't need it for the fast path */
 | 
						/* Keep this last, we don't need it for the fast path */
 | 
				
			||||||
	struct wait_queue_head		poll_wq;
 | 
						struct wait_queue_head		poll_wq;
 | 
				
			||||||
	struct io_restriction		restrictions;
 | 
						struct io_restriction		restrictions;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -303,7 +303,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 | 
				
			||||||
	INIT_LIST_HEAD(&ctx->sqd_list);
 | 
						INIT_LIST_HEAD(&ctx->sqd_list);
 | 
				
			||||||
	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 | 
						INIT_LIST_HEAD(&ctx->cq_overflow_list);
 | 
				
			||||||
	INIT_LIST_HEAD(&ctx->io_buffers_cache);
 | 
						INIT_LIST_HEAD(&ctx->io_buffers_cache);
 | 
				
			||||||
	INIT_HLIST_HEAD(&ctx->io_buf_list);
 | 
					 | 
				
			||||||
	ret = io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
 | 
						ret = io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
 | 
				
			||||||
			    sizeof(struct io_rsrc_node));
 | 
								    sizeof(struct io_rsrc_node));
 | 
				
			||||||
	ret |= io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
 | 
						ret |= io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
 | 
				
			||||||
| 
						 | 
					@ -2598,15 +2597,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 | 
				
			||||||
	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 | 
						return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void io_pages_unmap(void *ptr, struct page ***pages,
 | 
					void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
 | 
				
			||||||
			   unsigned short *npages)
 | 
							    bool put_pages)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	bool do_vunmap = false;
 | 
						bool do_vunmap = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!ptr)
 | 
						if (!ptr)
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (*npages) {
 | 
						if (put_pages && *npages) {
 | 
				
			||||||
		struct page **to_free = *pages;
 | 
							struct page **to_free = *pages;
 | 
				
			||||||
		int i;
 | 
							int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2628,14 +2627,6 @@ static void io_pages_unmap(void *ptr, struct page ***pages,
 | 
				
			||||||
	*npages = 0;
 | 
						*npages = 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void io_mem_free(void *ptr)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	if (!ptr)
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	folio_put(virt_to_folio(ptr));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void io_pages_free(struct page ***pages, int npages)
 | 
					static void io_pages_free(struct page ***pages, int npages)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct page **page_array = *pages;
 | 
						struct page **page_array = *pages;
 | 
				
			||||||
| 
						 | 
					@ -2730,8 +2721,10 @@ static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
 | 
				
			||||||
static void io_rings_free(struct io_ring_ctx *ctx)
 | 
					static void io_rings_free(struct io_ring_ctx *ctx)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
 | 
						if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
 | 
				
			||||||
		io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages);
 | 
							io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages,
 | 
				
			||||||
		io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages);
 | 
									true);
 | 
				
			||||||
 | 
							io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages,
 | 
				
			||||||
 | 
									true);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
 | 
							io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
 | 
				
			||||||
		ctx->n_ring_pages = 0;
 | 
							ctx->n_ring_pages = 0;
 | 
				
			||||||
| 
						 | 
					@ -2788,8 +2781,8 @@ static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size,
 | 
				
			||||||
	return ERR_PTR(-ENOMEM);
 | 
						return ERR_PTR(-ENOMEM);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void *io_pages_map(struct page ***out_pages, unsigned short *npages,
 | 
					void *io_pages_map(struct page ***out_pages, unsigned short *npages,
 | 
				
			||||||
			  size_t size)
 | 
							   size_t size)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
 | 
						gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
 | 
				
			||||||
	struct page **pages;
 | 
						struct page **pages;
 | 
				
			||||||
| 
						 | 
					@ -2819,17 +2812,6 @@ static void *io_pages_map(struct page ***out_pages, unsigned short *npages,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void *io_mem_alloc(size_t size)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
 | 
					 | 
				
			||||||
	void *ret;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ret = (void *) __get_free_pages(gfp, get_order(size));
 | 
					 | 
				
			||||||
	if (ret)
 | 
					 | 
				
			||||||
		return ret;
 | 
					 | 
				
			||||||
	return ERR_PTR(-ENOMEM);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
 | 
					static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
 | 
				
			||||||
				unsigned int cq_entries, size_t *sq_offset)
 | 
									unsigned int cq_entries, size_t *sq_offset)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -2926,7 +2908,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 | 
				
			||||||
		ctx->mm_account = NULL;
 | 
							ctx->mm_account = NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	io_rings_free(ctx);
 | 
						io_rings_free(ctx);
 | 
				
			||||||
	io_kbuf_mmap_list_free(ctx);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	percpu_ref_exit(&ctx->refs);
 | 
						percpu_ref_exit(&ctx->refs);
 | 
				
			||||||
	free_uid(ctx->user);
 | 
						free_uid(ctx->user);
 | 
				
			||||||
| 
						 | 
					@ -3396,10 +3377,8 @@ static void *io_uring_validate_mmap_request(struct file *file,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_ring_ctx *ctx = file->private_data;
 | 
						struct io_ring_ctx *ctx = file->private_data;
 | 
				
			||||||
	loff_t offset = pgoff << PAGE_SHIFT;
 | 
						loff_t offset = pgoff << PAGE_SHIFT;
 | 
				
			||||||
	struct page *page;
 | 
					 | 
				
			||||||
	void *ptr;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (offset & IORING_OFF_MMAP_MASK) {
 | 
						switch ((pgoff << PAGE_SHIFT) & IORING_OFF_MMAP_MASK) {
 | 
				
			||||||
	case IORING_OFF_SQ_RING:
 | 
						case IORING_OFF_SQ_RING:
 | 
				
			||||||
	case IORING_OFF_CQ_RING:
 | 
						case IORING_OFF_CQ_RING:
 | 
				
			||||||
		/* Don't allow mmap if the ring was setup without it */
 | 
							/* Don't allow mmap if the ring was setup without it */
 | 
				
			||||||
| 
						 | 
					@ -3414,6 +3393,7 @@ static void *io_uring_validate_mmap_request(struct file *file,
 | 
				
			||||||
	case IORING_OFF_PBUF_RING: {
 | 
						case IORING_OFF_PBUF_RING: {
 | 
				
			||||||
		struct io_buffer_list *bl;
 | 
							struct io_buffer_list *bl;
 | 
				
			||||||
		unsigned int bgid;
 | 
							unsigned int bgid;
 | 
				
			||||||
 | 
							void *ptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
 | 
							bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
 | 
				
			||||||
		bl = io_pbuf_get_bl(ctx, bgid);
 | 
							bl = io_pbuf_get_bl(ctx, bgid);
 | 
				
			||||||
| 
						 | 
					@ -3421,17 +3401,11 @@ static void *io_uring_validate_mmap_request(struct file *file,
 | 
				
			||||||
			return bl;
 | 
								return bl;
 | 
				
			||||||
		ptr = bl->buf_ring;
 | 
							ptr = bl->buf_ring;
 | 
				
			||||||
		io_put_bl(ctx, bl);
 | 
							io_put_bl(ctx, bl);
 | 
				
			||||||
		break;
 | 
							return ptr;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	default:
 | 
					 | 
				
			||||||
		return ERR_PTR(-EINVAL);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	page = virt_to_head_page(ptr);
 | 
						return ERR_PTR(-EINVAL);
 | 
				
			||||||
	if (sz > page_size(page))
 | 
					 | 
				
			||||||
		return ERR_PTR(-EINVAL);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return ptr;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
 | 
					int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
 | 
				
			||||||
| 
						 | 
					@ -3450,7 +3424,6 @@ static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
 | 
				
			||||||
	struct io_ring_ctx *ctx = file->private_data;
 | 
						struct io_ring_ctx *ctx = file->private_data;
 | 
				
			||||||
	size_t sz = vma->vm_end - vma->vm_start;
 | 
						size_t sz = vma->vm_end - vma->vm_start;
 | 
				
			||||||
	long offset = vma->vm_pgoff << PAGE_SHIFT;
 | 
						long offset = vma->vm_pgoff << PAGE_SHIFT;
 | 
				
			||||||
	unsigned long pfn;
 | 
					 | 
				
			||||||
	void *ptr;
 | 
						void *ptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
 | 
						ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
 | 
				
			||||||
| 
						 | 
					@ -3465,10 +3438,11 @@ static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
 | 
				
			||||||
	case IORING_OFF_SQES:
 | 
						case IORING_OFF_SQES:
 | 
				
			||||||
		return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
 | 
							return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
 | 
				
			||||||
						ctx->n_sqe_pages);
 | 
											ctx->n_sqe_pages);
 | 
				
			||||||
 | 
						case IORING_OFF_PBUF_RING:
 | 
				
			||||||
 | 
							return io_pbuf_mmap(file, vma);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
 | 
						return -EINVAL;
 | 
				
			||||||
	return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
 | 
					static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -109,8 +109,10 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
 | 
				
			||||||
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 | 
					bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 | 
				
			||||||
			bool cancel_all);
 | 
								bool cancel_all);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void *io_mem_alloc(size_t size);
 | 
					void *io_pages_map(struct page ***out_pages, unsigned short *npages,
 | 
				
			||||||
void io_mem_free(void *ptr);
 | 
							   size_t size);
 | 
				
			||||||
 | 
					void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
 | 
				
			||||||
 | 
							    bool put_pages);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
	IO_EVENTFD_OP_SIGNAL_BIT,
 | 
						IO_EVENTFD_OP_SIGNAL_BIT,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										132
									
								
								io_uring/kbuf.c
									
									
									
									
									
								
							
							
						
						
									
										132
									
								
								io_uring/kbuf.c
									
									
									
									
									
								
							| 
						 | 
					@ -32,25 +32,12 @@ struct io_provide_buf {
 | 
				
			||||||
	__u16				bid;
 | 
						__u16				bid;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct io_buf_free {
 | 
					 | 
				
			||||||
	struct hlist_node		list;
 | 
					 | 
				
			||||||
	void				*mem;
 | 
					 | 
				
			||||||
	size_t				size;
 | 
					 | 
				
			||||||
	int				inuse;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
 | 
					 | 
				
			||||||
							  unsigned int bgid)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return xa_load(&ctx->io_bl_xa, bgid);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
 | 
					static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
 | 
				
			||||||
							unsigned int bgid)
 | 
												unsigned int bgid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	lockdep_assert_held(&ctx->uring_lock);
 | 
						lockdep_assert_held(&ctx->uring_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return __io_buffer_get_list(ctx, bgid);
 | 
						return xa_load(&ctx->io_bl_xa, bgid);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int io_buffer_add_list(struct io_ring_ctx *ctx,
 | 
					static int io_buffer_add_list(struct io_ring_ctx *ctx,
 | 
				
			||||||
| 
						 | 
					@ -191,24 +178,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Mark the given mapped range as free for reuse
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void io_kbuf_mark_free(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct io_buf_free *ibf;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	hlist_for_each_entry(ibf, &ctx->io_buf_list, list) {
 | 
					 | 
				
			||||||
		if (bl->buf_ring == ibf->mem) {
 | 
					 | 
				
			||||||
			ibf->inuse = 0;
 | 
					 | 
				
			||||||
			return;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* can't happen... */
 | 
					 | 
				
			||||||
	WARN_ON_ONCE(1);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int __io_remove_buffers(struct io_ring_ctx *ctx,
 | 
					static int __io_remove_buffers(struct io_ring_ctx *ctx,
 | 
				
			||||||
			       struct io_buffer_list *bl, unsigned nbufs)
 | 
								       struct io_buffer_list *bl, unsigned nbufs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -220,23 +189,16 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (bl->is_buf_ring) {
 | 
						if (bl->is_buf_ring) {
 | 
				
			||||||
		i = bl->buf_ring->tail - bl->head;
 | 
							i = bl->buf_ring->tail - bl->head;
 | 
				
			||||||
		if (bl->is_mmap) {
 | 
							if (bl->buf_nr_pages) {
 | 
				
			||||||
			/*
 | 
					 | 
				
			||||||
			 * io_kbuf_list_free() will free the page(s) at
 | 
					 | 
				
			||||||
			 * ->release() time.
 | 
					 | 
				
			||||||
			 */
 | 
					 | 
				
			||||||
			io_kbuf_mark_free(ctx, bl);
 | 
					 | 
				
			||||||
			bl->buf_ring = NULL;
 | 
					 | 
				
			||||||
			bl->is_mmap = 0;
 | 
					 | 
				
			||||||
		} else if (bl->buf_nr_pages) {
 | 
					 | 
				
			||||||
			int j;
 | 
								int j;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			for (j = 0; j < bl->buf_nr_pages; j++)
 | 
								if (!bl->is_mmap) {
 | 
				
			||||||
				unpin_user_page(bl->buf_pages[j]);
 | 
									for (j = 0; j < bl->buf_nr_pages; j++)
 | 
				
			||||||
			kvfree(bl->buf_pages);
 | 
										unpin_user_page(bl->buf_pages[j]);
 | 
				
			||||||
			vunmap(bl->buf_ring);
 | 
								}
 | 
				
			||||||
			bl->buf_pages = NULL;
 | 
								io_pages_unmap(bl->buf_ring, &bl->buf_pages,
 | 
				
			||||||
			bl->buf_nr_pages = 0;
 | 
										&bl->buf_nr_pages, bl->is_mmap);
 | 
				
			||||||
 | 
								bl->is_mmap = 0;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		/* make sure it's seen as empty */
 | 
							/* make sure it's seen as empty */
 | 
				
			||||||
		INIT_LIST_HEAD(&bl->buf_list);
 | 
							INIT_LIST_HEAD(&bl->buf_list);
 | 
				
			||||||
| 
						 | 
					@ -537,63 +499,18 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * See if we have a suitable region that we can reuse, rather than allocate
 | 
					 | 
				
			||||||
 * both a new io_buf_free and mem region again. We leave it on the list as
 | 
					 | 
				
			||||||
 * even a reused entry will need freeing at ring release.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static struct io_buf_free *io_lookup_buf_free_entry(struct io_ring_ctx *ctx,
 | 
					 | 
				
			||||||
						    size_t ring_size)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct io_buf_free *ibf, *best = NULL;
 | 
					 | 
				
			||||||
	size_t best_dist;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	hlist_for_each_entry(ibf, &ctx->io_buf_list, list) {
 | 
					 | 
				
			||||||
		size_t dist;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (ibf->inuse || ibf->size < ring_size)
 | 
					 | 
				
			||||||
			continue;
 | 
					 | 
				
			||||||
		dist = ibf->size - ring_size;
 | 
					 | 
				
			||||||
		if (!best || dist < best_dist) {
 | 
					 | 
				
			||||||
			best = ibf;
 | 
					 | 
				
			||||||
			if (!dist)
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
			best_dist = dist;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return best;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
 | 
					static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
 | 
				
			||||||
			      struct io_uring_buf_reg *reg,
 | 
								      struct io_uring_buf_reg *reg,
 | 
				
			||||||
			      struct io_buffer_list *bl)
 | 
								      struct io_buffer_list *bl)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_buf_free *ibf;
 | 
					 | 
				
			||||||
	size_t ring_size;
 | 
						size_t ring_size;
 | 
				
			||||||
	void *ptr;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
 | 
						ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Reuse existing entry, if we can */
 | 
						bl->buf_ring = io_pages_map(&bl->buf_pages, &bl->buf_nr_pages, ring_size);
 | 
				
			||||||
	ibf = io_lookup_buf_free_entry(ctx, ring_size);
 | 
						if (!bl->buf_ring)
 | 
				
			||||||
	if (!ibf) {
 | 
							return -ENOMEM;
 | 
				
			||||||
		ptr = io_mem_alloc(ring_size);
 | 
					 | 
				
			||||||
		if (IS_ERR(ptr))
 | 
					 | 
				
			||||||
			return PTR_ERR(ptr);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* Allocate and store deferred free entry */
 | 
					 | 
				
			||||||
		ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT);
 | 
					 | 
				
			||||||
		if (!ibf) {
 | 
					 | 
				
			||||||
			io_mem_free(ptr);
 | 
					 | 
				
			||||||
			return -ENOMEM;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		ibf->mem = ptr;
 | 
					 | 
				
			||||||
		ibf->size = ring_size;
 | 
					 | 
				
			||||||
		hlist_add_head(&ibf->list, &ctx->io_buf_list);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	ibf->inuse = 1;
 | 
					 | 
				
			||||||
	bl->buf_ring = ibf->mem;
 | 
					 | 
				
			||||||
	bl->is_buf_ring = 1;
 | 
						bl->is_buf_ring = 1;
 | 
				
			||||||
	bl->is_mmap = 1;
 | 
						bl->is_mmap = 1;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					@ -741,18 +658,19 @@ struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
 | 
				
			||||||
	return ERR_PTR(-EINVAL);
 | 
						return ERR_PTR(-EINVAL);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma)
 | 
				
			||||||
 * Called at or after ->release(), free the mmap'ed buffers that we used
 | 
					 | 
				
			||||||
 * for memory mapped provided buffer rings.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_buf_free *ibf;
 | 
						struct io_ring_ctx *ctx = file->private_data;
 | 
				
			||||||
	struct hlist_node *tmp;
 | 
						loff_t pgoff = vma->vm_pgoff << PAGE_SHIFT;
 | 
				
			||||||
 | 
						struct io_buffer_list *bl;
 | 
				
			||||||
 | 
						int bgid, ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) {
 | 
						bgid = (pgoff & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
 | 
				
			||||||
		hlist_del(&ibf->list);
 | 
						bl = io_pbuf_get_bl(ctx, bgid);
 | 
				
			||||||
		io_mem_free(ibf->mem);
 | 
						if (IS_ERR(bl))
 | 
				
			||||||
		kfree(ibf);
 | 
							return PTR_ERR(bl);
 | 
				
			||||||
	}
 | 
					
 | 
				
			||||||
 | 
						ret = io_uring_mmap_pages(ctx, vma, bl->buf_pages, bl->buf_nr_pages);
 | 
				
			||||||
 | 
						io_put_bl(ctx, bl);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -55,8 +55,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 | 
				
			||||||
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 | 
					int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 | 
				
			||||||
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
 | 
					int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
 | 
					void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
 | 
					bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
 | 
				
			||||||
| 
						 | 
					@ -64,6 +62,7 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
 | 
				
			||||||
void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
 | 
					void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
 | 
				
			||||||
struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
 | 
					struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
 | 
				
			||||||
				      unsigned long bgid);
 | 
									      unsigned long bgid);
 | 
				
			||||||
 | 
					int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
 | 
					static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue