mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	io_uring: make ctx->timeout_lock a raw spinlock
Chase reports that their tester complaints about a locking context mismatch: ============================= [ BUG: Invalid wait context ] 6.13.0-rc1-gf137f14b7ccb-dirty #9 Not tainted ----------------------------- syz.1.25198/182604 is trying to lock: ffff88805e66a358 (&ctx->timeout_lock){-.-.}-{3:3}, at: spin_lock_irq include/linux/spinlock.h:376 [inline] ffff88805e66a358 (&ctx->timeout_lock){-.-.}-{3:3}, at: io_match_task_safe io_uring/io_uring.c:218 [inline] ffff88805e66a358 (&ctx->timeout_lock){-.-.}-{3:3}, at: io_match_task_safe+0x187/0x250 io_uring/io_uring.c:204 other info that might help us debug this: context-{5:5} 1 lock held by syz.1.25198/182604: #0: ffff88802b7d48c0 (&acct->lock){+.+.}-{2:2}, at: io_acct_cancel_pending_work+0x2d/0x6b0 io_uring/io-wq.c:1049 stack backtrace: CPU: 0 UID: 0 PID: 182604 Comm: syz.1.25198 Not tainted 6.13.0-rc1-gf137f14b7ccb-dirty #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: <TASK> __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x82/0xd0 lib/dump_stack.c:120 print_lock_invalid_wait_context kernel/locking/lockdep.c:4826 [inline] check_wait_context kernel/locking/lockdep.c:4898 [inline] __lock_acquire+0x883/0x3c80 kernel/locking/lockdep.c:5176 lock_acquire.part.0+0x11b/0x370 kernel/locking/lockdep.c:5849 __raw_spin_lock_irq include/linux/spinlock_api_smp.h:119 [inline] _raw_spin_lock_irq+0x36/0x50 kernel/locking/spinlock.c:170 spin_lock_irq include/linux/spinlock.h:376 [inline] io_match_task_safe io_uring/io_uring.c:218 [inline] io_match_task_safe+0x187/0x250 io_uring/io_uring.c:204 io_acct_cancel_pending_work+0xb8/0x6b0 io_uring/io-wq.c:1052 io_wq_cancel_pending_work io_uring/io-wq.c:1074 [inline] io_wq_cancel_cb+0xb0/0x390 io_uring/io-wq.c:1112 io_uring_try_cancel_requests+0x15e/0xd70 io_uring/io_uring.c:3062 io_uring_cancel_generic+0x6ec/0x8c0 io_uring/io_uring.c:3140 io_uring_files_cancel include/linux/io_uring.h:20 [inline] do_exit+0x494/0x27a0 kernel/exit.c:894 do_group_exit+0xb3/0x250 kernel/exit.c:1087 get_signal+0x1d77/0x1ef0 kernel/signal.c:3017 arch_do_signal_or_restart+0x79/0x5b0 arch/x86/kernel/signal.c:337 exit_to_user_mode_loop kernel/entry/common.c:111 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x150/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xd8/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f which is because io_uring has ctx->timeout_lock nesting inside the io-wq acct lock, the latter of which is used from inside the scheduler and hence is a raw spinlock, while the former is a "normal" spinlock and can hence be sleeping on PREEMPT_RT. Change ctx->timeout_lock to be a raw spinlock to solve this nesting dependency on PREEMPT_RT=y. Reported-by: chase xd <sl1589472800@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									99d6af6e8a
								
							
						
					
					
						commit
						020b40f356
					
				
					 3 changed files with 26 additions and 26 deletions
				
			
		|  | @ -345,7 +345,7 @@ struct io_ring_ctx { | |||
| 
 | ||||
| 	/* timeouts */ | ||||
| 	struct { | ||||
| 		spinlock_t		timeout_lock; | ||||
| 		raw_spinlock_t		timeout_lock; | ||||
| 		struct list_head	timeout_list; | ||||
| 		struct list_head	ltimeout_list; | ||||
| 		unsigned		cq_last_tm_flush; | ||||
|  |  | |||
|  | @ -215,9 +215,9 @@ bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx, | |||
| 		struct io_ring_ctx *ctx = head->ctx; | ||||
| 
 | ||||
| 		/* protect against races with linked timeouts */ | ||||
| 		spin_lock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 		matched = io_match_linked(head); | ||||
| 		spin_unlock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	} else { | ||||
| 		matched = io_match_linked(head); | ||||
| 	} | ||||
|  | @ -333,7 +333,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) | |||
| 	init_waitqueue_head(&ctx->cq_wait); | ||||
| 	init_waitqueue_head(&ctx->poll_wq); | ||||
| 	spin_lock_init(&ctx->completion_lock); | ||||
| 	spin_lock_init(&ctx->timeout_lock); | ||||
| 	raw_spin_lock_init(&ctx->timeout_lock); | ||||
| 	INIT_WQ_LIST(&ctx->iopoll_list); | ||||
| 	INIT_LIST_HEAD(&ctx->io_buffers_comp); | ||||
| 	INIT_LIST_HEAD(&ctx->defer_list); | ||||
|  | @ -498,10 +498,10 @@ static void io_prep_async_link(struct io_kiocb *req) | |||
| 	if (req->flags & REQ_F_LINK_TIMEOUT) { | ||||
| 		struct io_ring_ctx *ctx = req->ctx; | ||||
| 
 | ||||
| 		spin_lock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 		io_for_each_link(cur, req) | ||||
| 			io_prep_async_work(cur); | ||||
| 		spin_unlock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	} else { | ||||
| 		io_for_each_link(cur, req) | ||||
| 			io_prep_async_work(cur); | ||||
|  |  | |||
|  | @ -74,10 +74,10 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts) | |||
| 	if (!io_timeout_finish(timeout, data)) { | ||||
| 		if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) { | ||||
| 			/* re-arm timer */ | ||||
| 			spin_lock_irq(&ctx->timeout_lock); | ||||
| 			raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 			list_add(&timeout->list, ctx->timeout_list.prev); | ||||
| 			hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); | ||||
| 			spin_unlock_irq(&ctx->timeout_lock); | ||||
| 			raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
|  | @ -109,7 +109,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx) | |||
| 	u32 seq; | ||||
| 	struct io_timeout *timeout, *tmp; | ||||
| 
 | ||||
| 	spin_lock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); | ||||
| 
 | ||||
| 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { | ||||
|  | @ -134,7 +134,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx) | |||
| 		io_kill_timeout(req, 0); | ||||
| 	} | ||||
| 	ctx->cq_last_tm_flush = seq; | ||||
| 	spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| } | ||||
| 
 | ||||
| static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts) | ||||
|  | @ -200,9 +200,9 @@ void io_disarm_next(struct io_kiocb *req) | |||
| 	} else if (req->flags & REQ_F_LINK_TIMEOUT) { | ||||
| 		struct io_ring_ctx *ctx = req->ctx; | ||||
| 
 | ||||
| 		spin_lock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 		link = io_disarm_linked_timeout(req); | ||||
| 		spin_unlock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 		if (link) | ||||
| 			io_req_queue_tw_complete(link, -ECANCELED); | ||||
| 	} | ||||
|  | @ -238,11 +238,11 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) | |||
| 	struct io_ring_ctx *ctx = req->ctx; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&ctx->timeout_lock, flags); | ||||
| 	raw_spin_lock_irqsave(&ctx->timeout_lock, flags); | ||||
| 	list_del_init(&timeout->list); | ||||
| 	atomic_set(&req->ctx->cq_timeouts, | ||||
| 		atomic_read(&req->ctx->cq_timeouts) + 1); | ||||
| 	spin_unlock_irqrestore(&ctx->timeout_lock, flags); | ||||
| 	raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags); | ||||
| 
 | ||||
| 	if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) | ||||
| 		req_set_fail(req); | ||||
|  | @ -285,9 +285,9 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) | |||
| { | ||||
| 	struct io_kiocb *req; | ||||
| 
 | ||||
| 	spin_lock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 	req = io_timeout_extract(ctx, cd); | ||||
| 	spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 
 | ||||
| 	if (IS_ERR(req)) | ||||
| 		return PTR_ERR(req); | ||||
|  | @ -330,7 +330,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) | |||
| 	struct io_ring_ctx *ctx = req->ctx; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&ctx->timeout_lock, flags); | ||||
| 	raw_spin_lock_irqsave(&ctx->timeout_lock, flags); | ||||
| 	prev = timeout->head; | ||||
| 	timeout->head = NULL; | ||||
| 
 | ||||
|  | @ -345,7 +345,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) | |||
| 	} | ||||
| 	list_del(&timeout->list); | ||||
| 	timeout->prev = prev; | ||||
| 	spin_unlock_irqrestore(&ctx->timeout_lock, flags); | ||||
| 	raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags); | ||||
| 
 | ||||
| 	req->io_task_work.func = io_req_task_link_timeout; | ||||
| 	io_req_task_work_add(req); | ||||
|  | @ -472,12 +472,12 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) | |||
| 	} else { | ||||
| 		enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); | ||||
| 
 | ||||
| 		spin_lock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 		if (tr->ltimeout) | ||||
| 			ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); | ||||
| 		else | ||||
| 			ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); | ||||
| 		spin_unlock_irq(&ctx->timeout_lock); | ||||
| 		raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	} | ||||
| 
 | ||||
| 	if (ret < 0) | ||||
|  | @ -572,7 +572,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags) | |||
| 	struct list_head *entry; | ||||
| 	u32 tail, off = timeout->off; | ||||
| 
 | ||||
| 	spin_lock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * sqe->off holds how many events that need to occur for this | ||||
|  | @ -611,7 +611,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags) | |||
| 	list_add(&timeout->list, entry); | ||||
| 	data->timer.function = io_timeout_fn; | ||||
| 	hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); | ||||
| 	spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	return IOU_ISSUE_SKIP_COMPLETE; | ||||
| } | ||||
| 
 | ||||
|  | @ -620,7 +620,7 @@ void io_queue_linked_timeout(struct io_kiocb *req) | |||
| 	struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); | ||||
| 	struct io_ring_ctx *ctx = req->ctx; | ||||
| 
 | ||||
| 	spin_lock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 	/*
 | ||||
| 	 * If the back reference is NULL, then our linked request finished | ||||
| 	 * before we got a chance to setup the timer | ||||
|  | @ -633,7 +633,7 @@ void io_queue_linked_timeout(struct io_kiocb *req) | |||
| 				data->mode); | ||||
| 		list_add_tail(&timeout->list, &ctx->ltimeout_list); | ||||
| 	} | ||||
| 	spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	/* drop submission reference */ | ||||
| 	io_put_req(req); | ||||
| } | ||||
|  | @ -668,7 +668,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx | |||
| 	 * timeout_lockfirst to keep locking ordering. | ||||
| 	 */ | ||||
| 	spin_lock(&ctx->completion_lock); | ||||
| 	spin_lock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_lock_irq(&ctx->timeout_lock); | ||||
| 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) { | ||||
| 		struct io_kiocb *req = cmd_to_io_kiocb(timeout); | ||||
| 
 | ||||
|  | @ -676,7 +676,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx | |||
| 		    io_kill_timeout(req, -ECANCELED)) | ||||
| 			canceled++; | ||||
| 	} | ||||
| 	spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	raw_spin_unlock_irq(&ctx->timeout_lock); | ||||
| 	spin_unlock(&ctx->completion_lock); | ||||
| 	return canceled != 0; | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jens Axboe
						Jens Axboe