mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	io_uring-6.6-2023-10-06
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmUgNTAQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgph6VEACYZAz82hgQey4XZiO/8zXM5pZAAcfXBQ89 /yuedl4eOScymzyUNhgDYDHVq1Ccpxf6Wf0H8JAY0aUXkERePfw8gno6naaR8+Qm 92hcBSQA4OCsvR+RQ/cOZWHRsNQ9/i5iWuFnc4FuwhwqWjprKxsje2woOAAEVVsB SOmN+YnHqXfXLJvA+WJYMGgbyBgHkSNGZStFajMdAzwrlCBdJ7NqbvamsinMFz5A JhYHMLk1Z1JocVJt7TWmsl02LPvJKqUMSDL+li3q7xB4NoaP4iRTkOipGipX6ATn BOrh1kpROBQafDQxkLk0KdnnvidpoJFwfDPkB3Fp88OW7K5it8hctIhdcMoFG/CH lpE3aQwH2i3gq/FqAA6UUKUZAhkN6aLAFk3k7QVMjU0OZEZ8Evq7iSPRiXYtIyfO vbdUSF3qGjxz0apYm6mX7MFC7VFhRqkRdOjk34S1k35yipt39IkO+itITum73+ju KOy2E/4Z1UKykeWwqNe1XddUSI9+YK27EQbbKZJXppVxKgPHweNA6bh0Qu15Qr25 runYQEDQRoKmI7ih51rTy4LnvzmaddjAJbuUQyJXNLe5Wk29LzgJcj+T1kr+YxTJ fzJpEl5+AQm3dJm8/TcBrjem6EqIjR9fBdFv900pxWFy+2ApnDPu0UlR+O05C6ZR f0RMxJy0Xw== =t9If -----END PGP SIGNATURE----- Merge tag 'io_uring-6.6-2023-10-06' of git://git.kernel.dk/linux Pull io_uring fixes from Jens Axboe: - syzbot report on a crash on 32-bit arm with highmem, and went digging to check for potentially similar issues and found one more (me) - Fix a syzbot report with PROVE_LOCKING=y and setting up the ring in a disabled state (me) - Fix for race with CPU hotplut and io-wq init (Jeff) * tag 'io_uring-6.6-2023-10-06' of git://git.kernel.dk/linux: io-wq: fully initialize wqe before calling cpuhp_state_add_instance_nocalls() io_uring: don't allow IORING_SETUP_NO_MMAP rings on highmem pages io_uring: ensure io_lockdep_assert_cq_locked() handles disabled rings io_uring/kbuf: don't allow registered buffer rings on highmem pages
This commit is contained in:
		
						commit
						a88c386947
					
				
					 4 changed files with 65 additions and 29 deletions
				
			
		| 
						 | 
					@ -1151,9 +1151,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 | 
				
			||||||
	wq = kzalloc(sizeof(struct io_wq), GFP_KERNEL);
 | 
						wq = kzalloc(sizeof(struct io_wq), GFP_KERNEL);
 | 
				
			||||||
	if (!wq)
 | 
						if (!wq)
 | 
				
			||||||
		return ERR_PTR(-ENOMEM);
 | 
							return ERR_PTR(-ENOMEM);
 | 
				
			||||||
	ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 | 
					 | 
				
			||||||
	if (ret)
 | 
					 | 
				
			||||||
		goto err_wq;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	refcount_inc(&data->hash->refs);
 | 
						refcount_inc(&data->hash->refs);
 | 
				
			||||||
	wq->hash = data->hash;
 | 
						wq->hash = data->hash;
 | 
				
			||||||
| 
						 | 
					@ -1186,13 +1183,14 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 | 
				
			||||||
	wq->task = get_task_struct(data->task);
 | 
						wq->task = get_task_struct(data->task);
 | 
				
			||||||
	atomic_set(&wq->worker_refs, 1);
 | 
						atomic_set(&wq->worker_refs, 1);
 | 
				
			||||||
	init_completion(&wq->worker_done);
 | 
						init_completion(&wq->worker_done);
 | 
				
			||||||
 | 
						ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 | 
				
			||||||
 | 
						if (ret)
 | 
				
			||||||
 | 
							goto err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return wq;
 | 
						return wq;
 | 
				
			||||||
err:
 | 
					err:
 | 
				
			||||||
	io_wq_put_hash(data->hash);
 | 
						io_wq_put_hash(data->hash);
 | 
				
			||||||
	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	free_cpumask_var(wq->cpu_mask);
 | 
						free_cpumask_var(wq->cpu_mask);
 | 
				
			||||||
err_wq:
 | 
					 | 
				
			||||||
	kfree(wq);
 | 
						kfree(wq);
 | 
				
			||||||
	return ERR_PTR(ret);
 | 
						return ERR_PTR(ret);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2686,7 +2686,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct page **page_array;
 | 
						struct page **page_array;
 | 
				
			||||||
	unsigned int nr_pages;
 | 
						unsigned int nr_pages;
 | 
				
			||||||
	int ret;
 | 
						int ret, i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	*npages = 0;
 | 
						*npages = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2716,6 +2716,20 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (page_array[0] != page_array[ret - 1])
 | 
						if (page_array[0] != page_array[ret - 1])
 | 
				
			||||||
		goto err;
 | 
							goto err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Can't support mapping user allocated ring memory on 32-bit archs
 | 
				
			||||||
 | 
						 * where it could potentially reside in highmem. Just fail those with
 | 
				
			||||||
 | 
						 * -EINVAL, just like we did on kernels that didn't support this
 | 
				
			||||||
 | 
						 * feature.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						for (i = 0; i < nr_pages; i++) {
 | 
				
			||||||
 | 
							if (PageHighMem(page_array[i])) {
 | 
				
			||||||
 | 
								ret = -EINVAL;
 | 
				
			||||||
 | 
								goto err;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	*pages = page_array;
 | 
						*pages = page_array;
 | 
				
			||||||
	*npages = nr_pages;
 | 
						*npages = nr_pages;
 | 
				
			||||||
	return page_to_virt(page_array[0]);
 | 
						return page_to_virt(page_array[0]);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -86,20 +86,33 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
 | 
				
			||||||
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 | 
					bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 | 
				
			||||||
			bool cancel_all);
 | 
								bool cancel_all);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define io_lockdep_assert_cq_locked(ctx)				\
 | 
					#if defined(CONFIG_PROVE_LOCKING)
 | 
				
			||||||
	do {								\
 | 
					static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
 | 
				
			||||||
		lockdep_assert(in_task());				\
 | 
					{
 | 
				
			||||||
									\
 | 
						lockdep_assert(in_task());
 | 
				
			||||||
		if (ctx->flags & IORING_SETUP_IOPOLL) {			\
 | 
					
 | 
				
			||||||
			lockdep_assert_held(&ctx->uring_lock);		\
 | 
						if (ctx->flags & IORING_SETUP_IOPOLL) {
 | 
				
			||||||
		} else if (!ctx->task_complete) {			\
 | 
							lockdep_assert_held(&ctx->uring_lock);
 | 
				
			||||||
			lockdep_assert_held(&ctx->completion_lock);	\
 | 
						} else if (!ctx->task_complete) {
 | 
				
			||||||
		} else if (ctx->submitter_task->flags & PF_EXITING) {	\
 | 
							lockdep_assert_held(&ctx->completion_lock);
 | 
				
			||||||
			lockdep_assert(current_work());			\
 | 
						} else if (ctx->submitter_task) {
 | 
				
			||||||
		} else {						\
 | 
							/*
 | 
				
			||||||
			lockdep_assert(current == ctx->submitter_task);	\
 | 
							 * ->submitter_task may be NULL and we can still post a CQE,
 | 
				
			||||||
		}							\
 | 
							 * if the ring has been setup with IORING_SETUP_R_DISABLED.
 | 
				
			||||||
	} while (0)
 | 
							 * Not from an SQE, as those cannot be submitted, but via
 | 
				
			||||||
 | 
							 * updating tagged resources.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (ctx->submitter_task->flags & PF_EXITING)
 | 
				
			||||||
 | 
								lockdep_assert(current_work());
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								lockdep_assert(current == ctx->submitter_task);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void io_req_task_work_add(struct io_kiocb *req)
 | 
					static inline void io_req_task_work_add(struct io_kiocb *req)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -477,7 +477,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_uring_buf_ring *br;
 | 
						struct io_uring_buf_ring *br;
 | 
				
			||||||
	struct page **pages;
 | 
						struct page **pages;
 | 
				
			||||||
	int nr_pages;
 | 
						int i, nr_pages;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pages = io_pin_pages(reg->ring_addr,
 | 
						pages = io_pin_pages(reg->ring_addr,
 | 
				
			||||||
			     flex_array_size(br, bufs, reg->ring_entries),
 | 
								     flex_array_size(br, bufs, reg->ring_entries),
 | 
				
			||||||
| 
						 | 
					@ -485,6 +485,17 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
				
			||||||
	if (IS_ERR(pages))
 | 
						if (IS_ERR(pages))
 | 
				
			||||||
		return PTR_ERR(pages);
 | 
							return PTR_ERR(pages);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Apparently some 32-bit boxes (ARM) will return highmem pages,
 | 
				
			||||||
 | 
						 * which then need to be mapped. We could support that, but it'd
 | 
				
			||||||
 | 
						 * complicate the code and slowdown the common cases quite a bit.
 | 
				
			||||||
 | 
						 * So just error out, returning -EINVAL just like we did on kernels
 | 
				
			||||||
 | 
						 * that didn't support mapped buffer rings.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						for (i = 0; i < nr_pages; i++)
 | 
				
			||||||
 | 
							if (PageHighMem(pages[i]))
 | 
				
			||||||
 | 
								goto error_unpin;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	br = page_address(pages[0]);
 | 
						br = page_address(pages[0]);
 | 
				
			||||||
#ifdef SHM_COLOUR
 | 
					#ifdef SHM_COLOUR
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -496,13 +507,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
				
			||||||
	 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
 | 
						 * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
 | 
				
			||||||
	 * this transparently.
 | 
						 * this transparently.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
 | 
						if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
 | 
				
			||||||
		int i;
 | 
							goto error_unpin;
 | 
				
			||||||
 | 
					 | 
				
			||||||
		for (i = 0; i < nr_pages; i++)
 | 
					 | 
				
			||||||
			unpin_user_page(pages[i]);
 | 
					 | 
				
			||||||
		return -EINVAL;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	bl->buf_pages = pages;
 | 
						bl->buf_pages = pages;
 | 
				
			||||||
	bl->buf_nr_pages = nr_pages;
 | 
						bl->buf_nr_pages = nr_pages;
 | 
				
			||||||
| 
						 | 
					@ -510,6 +516,11 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
				
			||||||
	bl->is_mapped = 1;
 | 
						bl->is_mapped = 1;
 | 
				
			||||||
	bl->is_mmap = 0;
 | 
						bl->is_mmap = 0;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					error_unpin:
 | 
				
			||||||
 | 
						for (i = 0; i < nr_pages; i++)
 | 
				
			||||||
 | 
							unpin_user_page(pages[i]);
 | 
				
			||||||
 | 
						kvfree(pages);
 | 
				
			||||||
 | 
						return -EINVAL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
					static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue