mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	io_uring/net: improve recv bundles
Current recv bundles are only supported for multishot receives, and
additionally they also always post at least 2 CQEs if more data is
available than what a buffer will hold. This happens because the initial
bundle recv will do a single buffer, and then do the rest of what is in
the socket as a followup receive. As shown in a test program, if 1k
buffers are available and 32k is available to receive in the socket,
you'd get the following completions:
bundle=1, mshot=0
cqe res 1024
cqe res 1024
[...]
cqe res 1024
bundle=1, mshot=1
cqe res 1024
cqe res 31744
where bundle=1 && mshot=0 will post 32 1k completions, and bundle=1 &&
mshot=1 will post a 1k completion and then a 31k completion.
To support bundle recv without multishot, it's possible to simply retry
the recv immediately and post a single completion, rather than split it
into two completions. With the below patch, the same test looks as
follows:
bundle=1, mshot=0
cqe res 32768
bundle=1, mshot=1
cqe res 32768
where mshot=0 works fine for bundles, and both of them post just a
single 32k completion rather than split it into separate completions.
Posting fewer completions is always a nice win, and not needing
multishot for proper bundle efficiency is nice for cases that can't
necessarily use multishot.
Reported-by: Norman Maurer <norman_maurer@apple.com>
Link: https://lore.kernel.org/r/184f9f92-a682-4205-a15d-89e18f664502@kernel.dk
Fixes: 2f9c9515bd ("io_uring/net: support bundles for recv")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
			
			
This commit is contained in:
		
							parent
							
								
									932de5e35f
								
							
						
					
					
						commit
						7c71a0af81
					
				
					 1 changed files with 18 additions and 0 deletions
				
			
		|  | @ -76,6 +76,7 @@ struct io_sr_msg { | ||||||
| 	/* initialised and used only by !msg send variants */ | 	/* initialised and used only by !msg send variants */ | ||||||
| 	u16				buf_group; | 	u16				buf_group; | ||||||
| 	u16				buf_index; | 	u16				buf_index; | ||||||
|  | 	bool				retry; | ||||||
| 	void __user			*msg_control; | 	void __user			*msg_control; | ||||||
| 	/* used only for send zerocopy */ | 	/* used only for send zerocopy */ | ||||||
| 	struct io_kiocb 		*notif; | 	struct io_kiocb 		*notif; | ||||||
|  | @ -187,6 +188,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req, | ||||||
| 
 | 
 | ||||||
| 	req->flags &= ~REQ_F_BL_EMPTY; | 	req->flags &= ~REQ_F_BL_EMPTY; | ||||||
| 	sr->done_io = 0; | 	sr->done_io = 0; | ||||||
|  | 	sr->retry = false; | ||||||
| 	sr->len = 0; /* get from the provided buffer */ | 	sr->len = 0; /* get from the provided buffer */ | ||||||
| 	req->buf_index = sr->buf_group; | 	req->buf_index = sr->buf_group; | ||||||
| } | } | ||||||
|  | @ -402,6 +404,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) | ||||||
| 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); | 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); | ||||||
| 
 | 
 | ||||||
| 	sr->done_io = 0; | 	sr->done_io = 0; | ||||||
|  | 	sr->retry = false; | ||||||
| 
 | 
 | ||||||
| 	if (req->opcode != IORING_OP_SEND) { | 	if (req->opcode != IORING_OP_SEND) { | ||||||
| 		if (sqe->addr2 || sqe->file_index) | 		if (sqe->addr2 || sqe->file_index) | ||||||
|  | @ -785,6 +788,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) | ||||||
| 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); | 	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); | ||||||
| 
 | 
 | ||||||
| 	sr->done_io = 0; | 	sr->done_io = 0; | ||||||
|  | 	sr->retry = false; | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(sqe->file_index || sqe->addr2)) | 	if (unlikely(sqe->file_index || sqe->addr2)) | ||||||
| 		return -EINVAL; | 		return -EINVAL; | ||||||
|  | @ -833,6 +837,9 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) | ||||||
| 	return io_recvmsg_prep_setup(req); | 	return io_recvmsg_prep_setup(req); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* bits to clear in old and inherit in new cflags on bundle retry */ | ||||||
|  | #define CQE_F_MASK	(IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Finishes io_recv and io_recvmsg. |  * Finishes io_recv and io_recvmsg. | ||||||
|  * |  * | ||||||
|  | @ -852,9 +859,19 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, | ||||||
| 	if (sr->flags & IORING_RECVSEND_BUNDLE) { | 	if (sr->flags & IORING_RECVSEND_BUNDLE) { | ||||||
| 		cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), | 		cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), | ||||||
| 				      issue_flags); | 				      issue_flags); | ||||||
|  | 		if (sr->retry) | ||||||
|  | 			cflags = req->cqe.flags | (cflags & CQE_F_MASK); | ||||||
| 		/* bundle with no more immediate buffers, we're done */ | 		/* bundle with no more immediate buffers, we're done */ | ||||||
| 		if (req->flags & REQ_F_BL_EMPTY) | 		if (req->flags & REQ_F_BL_EMPTY) | ||||||
| 			goto finish; | 			goto finish; | ||||||
|  | 		/* if more is available, retry and append to this one */ | ||||||
|  | 		if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) { | ||||||
|  | 			req->cqe.flags = cflags & ~CQE_F_MASK; | ||||||
|  | 			sr->len = kmsg->msg.msg_inq; | ||||||
|  | 			sr->done_io += *ret; | ||||||
|  | 			sr->retry = true; | ||||||
|  | 			return false; | ||||||
|  | 		} | ||||||
| 	} else { | 	} else { | ||||||
| 		cflags |= io_put_kbuf(req, *ret, issue_flags); | 		cflags |= io_put_kbuf(req, *ret, issue_flags); | ||||||
| 	} | 	} | ||||||
|  | @ -1233,6 +1250,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) | ||||||
| 	struct io_kiocb *notif; | 	struct io_kiocb *notif; | ||||||
| 
 | 
 | ||||||
| 	zc->done_io = 0; | 	zc->done_io = 0; | ||||||
|  | 	zc->retry = false; | ||||||
| 	req->flags |= REQ_F_POLL_NO_LAZY; | 	req->flags |= REQ_F_POLL_NO_LAZY; | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) | 	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Jens Axboe
						Jens Axboe