forked from mirrors/linux
		
	io_uring/kbuf: defer release of mapped buffer rings
If a provided buffer ring is setup with IOU_PBUF_RING_MMAP, then the
kernel allocates the memory for it and the application is expected to
mmap(2) this memory. However, io_uring uses remap_pfn_range() for this
operation, so we cannot rely on normal munmap/release on freeing them
for us.
Stash an io_buf_free entry away for each of these, if any, and provide
a helper to free them post ->release().
Cc: stable@vger.kernel.org
Fixes: c56e022c0a ("io_uring: add support for user mapped provided buffer ring")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
			
			
This commit is contained in:
		
							parent
							
								
									edecf16897
								
							
						
					
					
						commit
						c392cbecd8
					
				
					 4 changed files with 46 additions and 5 deletions
				
			
		| 
						 | 
					@ -340,6 +340,9 @@ struct io_ring_ctx {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct list_head	io_buffers_cache;
 | 
						struct list_head	io_buffers_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* deferred free list, protected by ->uring_lock */
 | 
				
			||||||
 | 
						struct hlist_head	io_buf_list;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Keep this last, we don't need it for the fast path */
 | 
						/* Keep this last, we don't need it for the fast path */
 | 
				
			||||||
	struct wait_queue_head		poll_wq;
 | 
						struct wait_queue_head		poll_wq;
 | 
				
			||||||
	struct io_restriction		restrictions;
 | 
						struct io_restriction		restrictions;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -325,6 +325,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 | 
				
			||||||
	INIT_LIST_HEAD(&ctx->sqd_list);
 | 
						INIT_LIST_HEAD(&ctx->sqd_list);
 | 
				
			||||||
	INIT_LIST_HEAD(&ctx->cq_overflow_list);
 | 
						INIT_LIST_HEAD(&ctx->cq_overflow_list);
 | 
				
			||||||
	INIT_LIST_HEAD(&ctx->io_buffers_cache);
 | 
						INIT_LIST_HEAD(&ctx->io_buffers_cache);
 | 
				
			||||||
 | 
						INIT_HLIST_HEAD(&ctx->io_buf_list);
 | 
				
			||||||
	io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
 | 
						io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
 | 
				
			||||||
			    sizeof(struct io_rsrc_node));
 | 
								    sizeof(struct io_rsrc_node));
 | 
				
			||||||
	io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
 | 
						io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
 | 
				
			||||||
| 
						 | 
					@ -2950,6 +2951,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 | 
				
			||||||
		ctx->mm_account = NULL;
 | 
							ctx->mm_account = NULL;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	io_rings_free(ctx);
 | 
						io_rings_free(ctx);
 | 
				
			||||||
 | 
						io_kbuf_mmap_list_free(ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	percpu_ref_exit(&ctx->refs);
 | 
						percpu_ref_exit(&ctx->refs);
 | 
				
			||||||
	free_uid(ctx->user);
 | 
						free_uid(ctx->user);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,6 +33,11 @@ struct io_provide_buf {
 | 
				
			||||||
	__u16				bid;
 | 
						__u16				bid;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct io_buf_free {
 | 
				
			||||||
 | 
						struct hlist_node		list;
 | 
				
			||||||
 | 
						void				*mem;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
 | 
					static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
 | 
				
			||||||
							unsigned int bgid)
 | 
												unsigned int bgid)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -223,7 +228,10 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
 | 
				
			||||||
	if (bl->is_mapped) {
 | 
						if (bl->is_mapped) {
 | 
				
			||||||
		i = bl->buf_ring->tail - bl->head;
 | 
							i = bl->buf_ring->tail - bl->head;
 | 
				
			||||||
		if (bl->is_mmap) {
 | 
							if (bl->is_mmap) {
 | 
				
			||||||
			folio_put(virt_to_folio(bl->buf_ring));
 | 
								/*
 | 
				
			||||||
 | 
								 * io_kbuf_list_free() will free the page(s) at
 | 
				
			||||||
 | 
								 * ->release() time.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
			bl->buf_ring = NULL;
 | 
								bl->buf_ring = NULL;
 | 
				
			||||||
			bl->is_mmap = 0;
 | 
								bl->is_mmap = 0;
 | 
				
			||||||
		} else if (bl->buf_nr_pages) {
 | 
							} else if (bl->buf_nr_pages) {
 | 
				
			||||||
| 
						 | 
					@ -531,18 +539,28 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
				
			||||||
	return -EINVAL;
 | 
						return -EINVAL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
					static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
 | 
				
			||||||
 | 
								      struct io_uring_buf_reg *reg,
 | 
				
			||||||
			      struct io_buffer_list *bl)
 | 
								      struct io_buffer_list *bl)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
 | 
						struct io_buf_free *ibf;
 | 
				
			||||||
	size_t ring_size;
 | 
						size_t ring_size;
 | 
				
			||||||
	void *ptr;
 | 
						void *ptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
 | 
						ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
 | 
				
			||||||
	ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
 | 
						ptr = io_mem_alloc(ring_size);
 | 
				
			||||||
	if (!ptr)
 | 
						if (!ptr)
 | 
				
			||||||
		return -ENOMEM;
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Allocate and store deferred free entry */
 | 
				
			||||||
 | 
						ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT);
 | 
				
			||||||
 | 
						if (!ibf) {
 | 
				
			||||||
 | 
							io_mem_free(ptr);
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						ibf->mem = ptr;
 | 
				
			||||||
 | 
						hlist_add_head(&ibf->list, &ctx->io_buf_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bl->buf_ring = ptr;
 | 
						bl->buf_ring = ptr;
 | 
				
			||||||
	bl->is_mapped = 1;
 | 
						bl->is_mapped = 1;
 | 
				
			||||||
	bl->is_mmap = 1;
 | 
						bl->is_mmap = 1;
 | 
				
			||||||
| 
						 | 
					@ -599,7 +617,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 | 
				
			||||||
	if (!(reg.flags & IOU_PBUF_RING_MMAP))
 | 
						if (!(reg.flags & IOU_PBUF_RING_MMAP))
 | 
				
			||||||
		ret = io_pin_pbuf_ring(®, bl);
 | 
							ret = io_pin_pbuf_ring(®, bl);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		ret = io_alloc_pbuf_ring(®, bl);
 | 
							ret = io_alloc_pbuf_ring(ctx, ®, bl);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!ret) {
 | 
						if (!ret) {
 | 
				
			||||||
		bl->nr_entries = reg.ring_entries;
 | 
							bl->nr_entries = reg.ring_entries;
 | 
				
			||||||
| 
						 | 
					@ -649,3 +667,19 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return bl->buf_ring;
 | 
						return bl->buf_ring;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Called at or after ->release(), free the mmap'ed buffers that we used
 | 
				
			||||||
 | 
					 * for memory mapped provided buffer rings.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct io_buf_free *ibf;
 | 
				
			||||||
 | 
						struct hlist_node *tmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) {
 | 
				
			||||||
 | 
							hlist_del(&ibf->list);
 | 
				
			||||||
 | 
							io_mem_free(ibf->mem);
 | 
				
			||||||
 | 
							kfree(ibf);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -51,6 +51,8 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
 | 
				
			||||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 | 
					int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 | 
				
			||||||
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 | 
					int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
 | 
					unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
 | 
					bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue