mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	io_uring: allow user configurable IO thread CPU affinity
io-wq defaults to per-node masks for IO workers. This works fine by default, but isn't particularly handy for workloads that prefer more specific affinities, for either performance or isolation reasons. This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU mask that is then applied to IO thread workers, and an IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the default of per-node. Note that no care is given to existing IO threads, they will need to go through a reschedule before the affinity is correct if they are already running or sleeping. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									0e03496d19
								
							
						
					
					
						commit
						fe76421d1d
					
				
					 4 changed files with 74 additions and 0 deletions
				
			
		
							
								
								
									
										17
									
								
								fs/io-wq.c
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								fs/io-wq.c
									
									
									
									
									
								
							|  | @ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) | |||
| 	return __io_wq_cpu_online(wq, cpu, false); | ||||
| } | ||||
| 
 | ||||
| int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	for_each_node(i) { | ||||
| 		struct io_wqe *wqe = wq->wqes[i]; | ||||
| 
 | ||||
| 		if (mask) | ||||
| 			cpumask_copy(wqe->cpu_mask, mask); | ||||
| 		else | ||||
| 			cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); | ||||
| 	} | ||||
| 	rcu_read_unlock(); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static __init int io_wq_init(void) | ||||
| { | ||||
| 	int ret; | ||||
|  |  | |||
|  | @ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq); | |||
| void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); | ||||
| void io_wq_hash_work(struct io_wq_work *work, void *val); | ||||
| 
 | ||||
| int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask); | ||||
| 
 | ||||
| static inline bool io_wq_is_hashed(struct io_wq_work *work) | ||||
| { | ||||
| 	return work->flags & IO_WQ_WORK_HASHED; | ||||
|  |  | |||
|  | @ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, | |||
| 	return -EINVAL; | ||||
| } | ||||
| 
 | ||||
| static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, | ||||
| 				unsigned len) | ||||
| { | ||||
| 	struct io_uring_task *tctx = current->io_uring; | ||||
| 	cpumask_var_t new_mask; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (!tctx || !tctx->io_wq) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) | ||||
| 		return -ENOMEM; | ||||
| 
 | ||||
| 	cpumask_clear(new_mask); | ||||
| 	if (len > cpumask_size()) | ||||
| 		len = cpumask_size(); | ||||
| 
 | ||||
| 	if (copy_from_user(new_mask, arg, len)) { | ||||
| 		free_cpumask_var(new_mask); | ||||
| 		return -EFAULT; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); | ||||
| 	free_cpumask_var(new_mask); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) | ||||
| { | ||||
| 	struct io_uring_task *tctx = current->io_uring; | ||||
| 
 | ||||
| 	if (!tctx || !tctx->io_wq) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	return io_wq_cpu_affinity(tctx->io_wq, NULL); | ||||
| } | ||||
| 
 | ||||
| static bool io_register_op_must_quiesce(int op) | ||||
| { | ||||
| 	switch (op) { | ||||
|  | @ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op) | |||
| 	case IORING_REGISTER_FILES_UPDATE2: | ||||
| 	case IORING_REGISTER_BUFFERS2: | ||||
| 	case IORING_REGISTER_BUFFERS_UPDATE: | ||||
| 	case IORING_REGISTER_IOWQ_AFF: | ||||
| 	case IORING_UNREGISTER_IOWQ_AFF: | ||||
| 		return false; | ||||
| 	default: | ||||
| 		return true; | ||||
|  | @ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, | |||
| 		ret = io_register_rsrc_update(ctx, arg, nr_args, | ||||
| 					      IORING_RSRC_BUFFER); | ||||
| 		break; | ||||
| 	case IORING_REGISTER_IOWQ_AFF: | ||||
| 		ret = -EINVAL; | ||||
| 		if (!arg || !nr_args) | ||||
| 			break; | ||||
| 		ret = io_register_iowq_aff(ctx, arg, nr_args); | ||||
| 		break; | ||||
| 	case IORING_UNREGISTER_IOWQ_AFF: | ||||
| 		ret = -EINVAL; | ||||
| 		if (arg || nr_args) | ||||
| 			break; | ||||
| 		ret = io_unregister_iowq_aff(ctx); | ||||
| 		break; | ||||
| 	default: | ||||
| 		ret = -EINVAL; | ||||
| 		break; | ||||
|  |  | |||
|  | @ -306,6 +306,10 @@ enum { | |||
| 	IORING_REGISTER_BUFFERS2		= 15, | ||||
| 	IORING_REGISTER_BUFFERS_UPDATE		= 16, | ||||
| 
 | ||||
| 	/* set/clear io-wq thread affinities */ | ||||
| 	IORING_REGISTER_IOWQ_AFF		= 17, | ||||
| 	IORING_UNREGISTER_IOWQ_AFF		= 18, | ||||
| 
 | ||||
| 	/* this goes last */ | ||||
| 	IORING_REGISTER_LAST | ||||
| }; | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jens Axboe
						Jens Axboe