forked from mirrors/linux
		
	io_uring: lockless task list
With networking use cases we see contention on the spinlock used to protect the task_list when multiple threads try and add completions at once. Instead we can use a lockless list, and assume that the first caller to add to the list is responsible for kicking off task work. Signed-off-by: Dylan Yudaken <dylany@fb.com> Link: https://lore.kernel.org/r/20220622134028.2013417-4-dylany@fb.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									c34398a8c0
								
							
						
					
					
						commit
						f88262e60b
					
				
					 4 changed files with 14 additions and 35 deletions
				
			
		| 
						 | 
					@ -428,7 +428,7 @@ typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct io_task_work {
 | 
					struct io_task_work {
 | 
				
			||||||
	union {
 | 
						union {
 | 
				
			||||||
		struct io_wq_work_node	node;
 | 
							struct llist_node	node;
 | 
				
			||||||
		struct llist_node	fallback_node;
 | 
							struct llist_node	fallback_node;
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
	io_req_tw_func_t		func;
 | 
						io_req_tw_func_t		func;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -986,11 +986,12 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
 | 
				
			||||||
	percpu_ref_put(&ctx->refs);
 | 
						percpu_ref_put(&ctx->refs);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void handle_tw_list(struct io_wq_work_node *node,
 | 
					
 | 
				
			||||||
 | 
					static void handle_tw_list(struct llist_node *node,
 | 
				
			||||||
			   struct io_ring_ctx **ctx, bool *locked)
 | 
								   struct io_ring_ctx **ctx, bool *locked)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		struct io_wq_work_node *next = node->next;
 | 
							struct llist_node *next = node->next;
 | 
				
			||||||
		struct io_kiocb *req = container_of(node, struct io_kiocb,
 | 
							struct io_kiocb *req = container_of(node, struct io_kiocb,
 | 
				
			||||||
						    io_task_work.node);
 | 
											    io_task_work.node);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1014,23 +1015,11 @@ void tctx_task_work(struct callback_head *cb)
 | 
				
			||||||
	struct io_ring_ctx *ctx = NULL;
 | 
						struct io_ring_ctx *ctx = NULL;
 | 
				
			||||||
	struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
 | 
						struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
 | 
				
			||||||
						  task_work);
 | 
											  task_work);
 | 
				
			||||||
 | 
						struct llist_node *node = llist_del_all(&tctx->task_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (1) {
 | 
						if (node) {
 | 
				
			||||||
		struct io_wq_work_node *node;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		spin_lock_irq(&tctx->task_lock);
 | 
					 | 
				
			||||||
		node = tctx->task_list.first;
 | 
					 | 
				
			||||||
		INIT_WQ_LIST(&tctx->task_list);
 | 
					 | 
				
			||||||
		if (!node)
 | 
					 | 
				
			||||||
			tctx->task_running = false;
 | 
					 | 
				
			||||||
		spin_unlock_irq(&tctx->task_lock);
 | 
					 | 
				
			||||||
		if (!node)
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
		handle_tw_list(node, &ctx, &uring_locked);
 | 
							handle_tw_list(node, &ctx, &uring_locked);
 | 
				
			||||||
		cond_resched();
 | 
							cond_resched();
 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (data_race(!tctx->task_list.first) && uring_locked)
 | 
					 | 
				
			||||||
			io_submit_flush_completions(ctx);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ctx_flush_and_put(ctx, &uring_locked);
 | 
						ctx_flush_and_put(ctx, &uring_locked);
 | 
				
			||||||
| 
						 | 
					@ -1044,16 +1033,10 @@ void io_req_task_work_add(struct io_kiocb *req)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_uring_task *tctx = req->task->io_uring;
 | 
						struct io_uring_task *tctx = req->task->io_uring;
 | 
				
			||||||
	struct io_ring_ctx *ctx = req->ctx;
 | 
						struct io_ring_ctx *ctx = req->ctx;
 | 
				
			||||||
	struct io_wq_work_node *node;
 | 
						struct llist_node *node;
 | 
				
			||||||
	unsigned long flags;
 | 
					 | 
				
			||||||
	bool running;
 | 
						bool running;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_irqsave(&tctx->task_lock, flags);
 | 
						running = !llist_add(&req->io_task_work.node, &tctx->task_list);
 | 
				
			||||||
	wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
 | 
					 | 
				
			||||||
	running = tctx->task_running;
 | 
					 | 
				
			||||||
	if (!running)
 | 
					 | 
				
			||||||
		tctx->task_running = true;
 | 
					 | 
				
			||||||
	spin_unlock_irqrestore(&tctx->task_lock, flags);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* task_work already pending, we're done */
 | 
						/* task_work already pending, we're done */
 | 
				
			||||||
	if (running)
 | 
						if (running)
 | 
				
			||||||
| 
						 | 
					@ -1065,11 +1048,8 @@ void io_req_task_work_add(struct io_kiocb *req)
 | 
				
			||||||
	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
 | 
						if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_irqsave(&tctx->task_lock, flags);
 | 
					
 | 
				
			||||||
	tctx->task_running = false;
 | 
						node = llist_del_all(&tctx->task_list);
 | 
				
			||||||
	node = tctx->task_list.first;
 | 
					 | 
				
			||||||
	INIT_WQ_LIST(&tctx->task_list);
 | 
					 | 
				
			||||||
	spin_unlock_irqrestore(&tctx->task_lock, flags);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (node) {
 | 
						while (node) {
 | 
				
			||||||
		req = container_of(node, struct io_kiocb, io_task_work.node);
 | 
							req = container_of(node, struct io_kiocb, io_task_work.node);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -86,8 +86,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
 | 
				
			||||||
	atomic_set(&tctx->in_idle, 0);
 | 
						atomic_set(&tctx->in_idle, 0);
 | 
				
			||||||
	atomic_set(&tctx->inflight_tracked, 0);
 | 
						atomic_set(&tctx->inflight_tracked, 0);
 | 
				
			||||||
	task->io_uring = tctx;
 | 
						task->io_uring = tctx;
 | 
				
			||||||
	spin_lock_init(&tctx->task_lock);
 | 
						init_llist_head(&tctx->task_list);
 | 
				
			||||||
	INIT_WQ_LIST(&tctx->task_list);
 | 
					 | 
				
			||||||
	init_task_work(&tctx->task_work, tctx_task_work);
 | 
						init_task_work(&tctx->task_work, tctx_task_work);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,7 @@
 | 
				
			||||||
// SPDX-License-Identifier: GPL-2.0
 | 
					// SPDX-License-Identifier: GPL-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <linux/llist.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Arbitrary limit, can be raised if need be
 | 
					 * Arbitrary limit, can be raised if need be
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
| 
						 | 
					@ -19,9 +21,7 @@ struct io_uring_task {
 | 
				
			||||||
	struct percpu_counter		inflight;
 | 
						struct percpu_counter		inflight;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct { /* task_work */
 | 
						struct { /* task_work */
 | 
				
			||||||
		spinlock_t		task_lock;
 | 
							struct llist_head	task_list;
 | 
				
			||||||
		bool			task_running;
 | 
					 | 
				
			||||||
		struct io_wq_work_list	task_list;
 | 
					 | 
				
			||||||
		struct callback_head	task_work;
 | 
							struct callback_head	task_work;
 | 
				
			||||||
	} ____cacheline_aligned_in_smp;
 | 
						} ____cacheline_aligned_in_smp;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue