mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	io_uring: add option to remove SQ indirection
Not many aware, but io_uring submission queue has two levels. The first level usually appears as sq_array and stores indexes into the actual SQ. To my knowledge, no one has ever seriously used it, nor liburing exposes it to users. Add IORING_SETUP_NO_SQARRAY, when set we don't bother creating and using the sq_array and SQ heads/tails will be pointing directly into the SQ. Improves memory footprint, in term of both allocations as well as cache usage, and also should make io_get_sqe() less branchy in the end. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/0ffa3268a5ef61d326201ff43a233315c96312e0.1692916914.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									e5598d6ae6
								
							
						
					
					
						commit
						2af89abda7
					
				
					 2 changed files with 37 additions and 20 deletions
				
			
		|  | @ -185,6 +185,11 @@ enum { | ||||||
|  */ |  */ | ||||||
| #define IORING_SETUP_REGISTERED_FD_ONLY	(1U << 15) | #define IORING_SETUP_REGISTERED_FD_ONLY	(1U << 15) | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Removes indirection through the SQ index array. | ||||||
|  |  */ | ||||||
|  | #define IORING_SETUP_NO_SQARRAY		(1U << 16) | ||||||
|  | 
 | ||||||
| enum io_uring_op { | enum io_uring_op { | ||||||
| 	IORING_OP_NOP, | 	IORING_OP_NOP, | ||||||
| 	IORING_OP_READV, | 	IORING_OP_READV, | ||||||
|  |  | ||||||
|  | @ -2339,8 +2339,21 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) | ||||||
|  */ |  */ | ||||||
| static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) | static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) | ||||||
| { | { | ||||||
| 	unsigned head, mask = ctx->sq_entries - 1; | 	unsigned mask = ctx->sq_entries - 1; | ||||||
| 	unsigned sq_idx = ctx->cached_sq_head++ & mask; | 	unsigned head = ctx->cached_sq_head++ & mask; | ||||||
|  | 
 | ||||||
|  | 	if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) { | ||||||
|  | 		head = READ_ONCE(ctx->sq_array[head]); | ||||||
|  | 		if (unlikely(head >= ctx->sq_entries)) { | ||||||
|  | 			/* drop invalid entries */ | ||||||
|  | 			spin_lock(&ctx->completion_lock); | ||||||
|  | 			ctx->cq_extra--; | ||||||
|  | 			spin_unlock(&ctx->completion_lock); | ||||||
|  | 			WRITE_ONCE(ctx->rings->sq_dropped, | ||||||
|  | 				   READ_ONCE(ctx->rings->sq_dropped) + 1); | ||||||
|  | 			return false; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * The cached sq head (or cq tail) serves two purposes: | 	 * The cached sq head (or cq tail) serves two purposes: | ||||||
|  | @ -2350,22 +2363,12 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) | ||||||
| 	 * 2) allows the kernel side to track the head on its own, even | 	 * 2) allows the kernel side to track the head on its own, even | ||||||
| 	 *    though the application is the one updating it. | 	 *    though the application is the one updating it. | ||||||
| 	 */ | 	 */ | ||||||
| 	head = READ_ONCE(ctx->sq_array[sq_idx]); |  | ||||||
| 	if (likely(head < ctx->sq_entries)) { |  | ||||||
| 		/* double index for 128-byte SQEs, twice as long */ |  | ||||||
| 		if (ctx->flags & IORING_SETUP_SQE128) |  | ||||||
| 			head <<= 1; |  | ||||||
| 		*sqe = &ctx->sq_sqes[head]; |  | ||||||
| 		return true; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	/* drop invalid entries */ | 	/* double index for 128-byte SQEs, twice as long */ | ||||||
| 	spin_lock(&ctx->completion_lock); | 	if (ctx->flags & IORING_SETUP_SQE128) | ||||||
| 	ctx->cq_extra--; | 		head <<= 1; | ||||||
| 	spin_unlock(&ctx->completion_lock); | 	*sqe = &ctx->sq_sqes[head]; | ||||||
| 	WRITE_ONCE(ctx->rings->sq_dropped, | 	return true; | ||||||
| 		   READ_ONCE(ctx->rings->sq_dropped) + 1); |  | ||||||
| 	return false; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) | int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) | ||||||
|  | @ -2734,6 +2737,12 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries | ||||||
| 		return SIZE_MAX; | 		return SIZE_MAX; | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | 	if (ctx->flags & IORING_SETUP_NO_SQARRAY) { | ||||||
|  | 		if (sq_offset) | ||||||
|  | 			*sq_offset = SIZE_MAX; | ||||||
|  | 		return off; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	if (sq_offset) | 	if (sq_offset) | ||||||
| 		*sq_offset = off; | 		*sq_offset = off; | ||||||
| 
 | 
 | ||||||
|  | @ -3710,7 +3719,8 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, | ||||||
| 		return PTR_ERR(rings); | 		return PTR_ERR(rings); | ||||||
| 
 | 
 | ||||||
| 	ctx->rings = rings; | 	ctx->rings = rings; | ||||||
| 	ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); | 	if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) | ||||||
|  | 		ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); | ||||||
| 	rings->sq_ring_mask = p->sq_entries - 1; | 	rings->sq_ring_mask = p->sq_entries - 1; | ||||||
| 	rings->cq_ring_mask = p->cq_entries - 1; | 	rings->cq_ring_mask = p->cq_entries - 1; | ||||||
| 	rings->sq_ring_entries = p->sq_entries; | 	rings->sq_ring_entries = p->sq_entries; | ||||||
|  | @ -3921,7 +3931,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, | ||||||
| 	p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); | 	p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); | ||||||
| 	p->sq_off.flags = offsetof(struct io_rings, sq_flags); | 	p->sq_off.flags = offsetof(struct io_rings, sq_flags); | ||||||
| 	p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); | 	p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); | ||||||
| 	p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; | 	if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) | ||||||
|  | 		p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; | ||||||
| 	p->sq_off.resv1 = 0; | 	p->sq_off.resv1 = 0; | ||||||
| 	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) | 	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) | ||||||
| 		p->sq_off.user_addr = 0; | 		p->sq_off.user_addr = 0; | ||||||
|  | @ -4010,7 +4021,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) | ||||||
| 			IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG | | 			IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG | | ||||||
| 			IORING_SETUP_SQE128 | IORING_SETUP_CQE32 | | 			IORING_SETUP_SQE128 | IORING_SETUP_CQE32 | | ||||||
| 			IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | | 			IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN | | ||||||
| 			IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY)) | 			IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY | | ||||||
|  | 			IORING_SETUP_NO_SQARRAY)) | ||||||
| 		return -EINVAL; | 		return -EINVAL; | ||||||
| 
 | 
 | ||||||
| 	return io_uring_create(entries, &p, params); | 	return io_uring_create(entries, &p, params); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Pavel Begunkov
						Pavel Begunkov