mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-03 18:20:25 +02:00 
			
		
		
		
	io_uring/net: support bundles for send
If IORING_OP_SEND is used with provided buffers, the caller may also set IORING_RECVSEND_BUNDLE to turn it into a multi-buffer send. The idea is that an application can fill outgoing buffers in a provided buffer group, and then arm a single send that will service them all. Once there are no more buffers to send, or if the requested length has been sent, the request posts a single completion for all the buffers. This only enables it for IORING_OP_SEND, IORING_OP_SENDMSG is coming in a separate patch. However, this patch does do a lot of the prep work that makes wiring up the sendmsg variant pretty trivial. They share the prep side. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									35c8711c8f
								
							
						
					
					
						commit
						a05d1f625c
					
				
					 2 changed files with 138 additions and 18 deletions
				
			
		| 
						 | 
					@ -351,11 +351,20 @@ enum io_uring_op {
 | 
				
			||||||
 *				0 is reported if zerocopy was actually possible.
 | 
					 *				0 is reported if zerocopy was actually possible.
 | 
				
			||||||
 *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
 | 
					 *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
 | 
				
			||||||
 *				(at least partially).
 | 
					 *				(at least partially).
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * IORING_RECVSEND_BUNDLE	Used with IOSQE_BUFFER_SELECT. If set, send will
 | 
				
			||||||
 | 
					 *				grab as many buffers from the buffer group ID
 | 
				
			||||||
 | 
					 *				given and send them all. The completion result
 | 
				
			||||||
 | 
					 *				will be the number of buffers send, with the
 | 
				
			||||||
 | 
					 *				starting buffer ID in cqe->flags as per usual
 | 
				
			||||||
 | 
					 *				for provided buffer usage. The buffers will be
 | 
				
			||||||
 | 
					 *				contigious from the starting buffer ID.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define IORING_RECVSEND_POLL_FIRST	(1U << 0)
 | 
					#define IORING_RECVSEND_POLL_FIRST	(1U << 0)
 | 
				
			||||||
#define IORING_RECV_MULTISHOT		(1U << 1)
 | 
					#define IORING_RECV_MULTISHOT		(1U << 1)
 | 
				
			||||||
#define IORING_RECVSEND_FIXED_BUF	(1U << 2)
 | 
					#define IORING_RECVSEND_FIXED_BUF	(1U << 2)
 | 
				
			||||||
#define IORING_SEND_ZC_REPORT_USAGE	(1U << 3)
 | 
					#define IORING_SEND_ZC_REPORT_USAGE	(1U << 3)
 | 
				
			||||||
 | 
					#define IORING_RECVSEND_BUNDLE		(1U << 4)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * cqe.res for IORING_CQE_F_NOTIF if
 | 
					 * cqe.res for IORING_CQE_F_NOTIF if
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										147
									
								
								io_uring/net.c
									
									
									
									
									
								
							
							
						
						
									
										147
									
								
								io_uring/net.c
									
									
									
									
									
								
							| 
						 | 
					@ -57,7 +57,7 @@ struct io_sr_msg {
 | 
				
			||||||
		struct user_msghdr __user	*umsg;
 | 
							struct user_msghdr __user	*umsg;
 | 
				
			||||||
		void __user			*buf;
 | 
							void __user			*buf;
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
	unsigned			len;
 | 
						int				len;
 | 
				
			||||||
	unsigned			done_io;
 | 
						unsigned			done_io;
 | 
				
			||||||
	unsigned			msg_flags;
 | 
						unsigned			msg_flags;
 | 
				
			||||||
	unsigned			nr_multishot_loops;
 | 
						unsigned			nr_multishot_loops;
 | 
				
			||||||
| 
						 | 
					@ -389,6 +389,8 @@ static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg)
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
					int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
						struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
				
			||||||
| 
						 | 
					@ -407,11 +409,20 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
				
			||||||
	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 | 
						sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
 | 
				
			||||||
	sr->len = READ_ONCE(sqe->len);
 | 
						sr->len = READ_ONCE(sqe->len);
 | 
				
			||||||
	sr->flags = READ_ONCE(sqe->ioprio);
 | 
						sr->flags = READ_ONCE(sqe->ioprio);
 | 
				
			||||||
	if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
 | 
						if (sr->flags & ~SENDMSG_FLAGS)
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 | 
						sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 | 
				
			||||||
	if (sr->msg_flags & MSG_DONTWAIT)
 | 
						if (sr->msg_flags & MSG_DONTWAIT)
 | 
				
			||||||
		req->flags |= REQ_F_NOWAIT;
 | 
							req->flags |= REQ_F_NOWAIT;
 | 
				
			||||||
 | 
						if (sr->flags & IORING_RECVSEND_BUNDLE) {
 | 
				
			||||||
 | 
							if (req->opcode == IORING_OP_SENDMSG)
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							if (!(req->flags & REQ_F_BUFFER_SELECT))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							sr->msg_flags |= MSG_WAITALL;
 | 
				
			||||||
 | 
							sr->buf_group = req->buf_index;
 | 
				
			||||||
 | 
							req->buf_list = NULL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_COMPAT
 | 
					#ifdef CONFIG_COMPAT
 | 
				
			||||||
	if (req->ctx->compat)
 | 
						if (req->ctx->compat)
 | 
				
			||||||
| 
						 | 
					@ -427,6 +438,79 @@ static void io_req_msg_cleanup(struct io_kiocb *req,
 | 
				
			||||||
	io_netmsg_recycle(req, issue_flags);
 | 
						io_netmsg_recycle(req, issue_flags);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * For bundle completions, we need to figure out how many segments we consumed.
 | 
				
			||||||
 | 
					 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
 | 
				
			||||||
 | 
					 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
 | 
				
			||||||
 | 
					 * the segments, then it's a trivial questiont o answer. If we have residual
 | 
				
			||||||
 | 
					 * data in the iter, then loop the segments to figure out how much we
 | 
				
			||||||
 | 
					 * transferred.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct iovec *iov;
 | 
				
			||||||
 | 
						int nbufs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* no data is always zero segments, and a ubuf is always 1 segment */
 | 
				
			||||||
 | 
						if (ret <= 0)
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						if (iter_is_ubuf(&kmsg->msg.msg_iter))
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						iov = kmsg->free_iov;
 | 
				
			||||||
 | 
						if (!iov)
 | 
				
			||||||
 | 
							iov = &kmsg->fast_iov;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* if all data was transferred, it's basic pointer math */
 | 
				
			||||||
 | 
						if (!iov_iter_count(&kmsg->msg.msg_iter))
 | 
				
			||||||
 | 
							return iter_iov(&kmsg->msg.msg_iter) - iov;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* short transfer, count segments */
 | 
				
			||||||
 | 
						nbufs = 0;
 | 
				
			||||||
 | 
						do {
 | 
				
			||||||
 | 
							int this_len = min_t(int, iov[nbufs].iov_len, ret);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							nbufs++;
 | 
				
			||||||
 | 
							ret -= this_len;
 | 
				
			||||||
 | 
						} while (ret);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return nbufs;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool io_send_finish(struct io_kiocb *req, int *ret,
 | 
				
			||||||
 | 
									  struct io_async_msghdr *kmsg,
 | 
				
			||||||
 | 
									  unsigned issue_flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
				
			||||||
 | 
						bool bundle_finished = *ret <= 0;
 | 
				
			||||||
 | 
						unsigned int cflags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
 | 
				
			||||||
 | 
							cflags = io_put_kbuf(req, issue_flags);
 | 
				
			||||||
 | 
							goto finish;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), issue_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
 | 
				
			||||||
 | 
							goto finish;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Fill CQE for this receive and see if we should keep trying to
 | 
				
			||||||
 | 
						 * receive from this socket.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
 | 
				
			||||||
 | 
							io_mshot_prep_retry(req, kmsg);
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Otherwise stop bundle and use the current result. */
 | 
				
			||||||
 | 
					finish:
 | 
				
			||||||
 | 
						io_req_set_res(req, *ret, cflags);
 | 
				
			||||||
 | 
						*ret = IOU_OK;
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 | 
					int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
						struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
				
			||||||
| 
						 | 
					@ -482,7 +566,6 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
 | 
				
			||||||
	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
						struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
				
			||||||
	struct io_async_msghdr *kmsg = req->async_data;
 | 
						struct io_async_msghdr *kmsg = req->async_data;
 | 
				
			||||||
	struct socket *sock;
 | 
						struct socket *sock;
 | 
				
			||||||
	unsigned int cflags;
 | 
					 | 
				
			||||||
	unsigned flags;
 | 
						unsigned flags;
 | 
				
			||||||
	int min_ret = 0;
 | 
						int min_ret = 0;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
| 
						 | 
					@ -495,21 +578,47 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
 | 
				
			||||||
	    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 | 
						    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 | 
				
			||||||
		return -EAGAIN;
 | 
							return -EAGAIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (io_do_buffer_select(req)) {
 | 
					 | 
				
			||||||
		size_t len = sr->len;
 | 
					 | 
				
			||||||
		void __user *buf;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		buf = io_buffer_select(req, &len, issue_flags);
 | 
					 | 
				
			||||||
		if (unlikely(!buf))
 | 
					 | 
				
			||||||
			return -ENOBUFS;
 | 
					 | 
				
			||||||
		sr->buf = buf;
 | 
					 | 
				
			||||||
		sr->len = len;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	flags = sr->msg_flags;
 | 
						flags = sr->msg_flags;
 | 
				
			||||||
	if (issue_flags & IO_URING_F_NONBLOCK)
 | 
						if (issue_flags & IO_URING_F_NONBLOCK)
 | 
				
			||||||
		flags |= MSG_DONTWAIT;
 | 
							flags |= MSG_DONTWAIT;
 | 
				
			||||||
	if (flags & MSG_WAITALL)
 | 
					
 | 
				
			||||||
 | 
					retry_bundle:
 | 
				
			||||||
 | 
						if (io_do_buffer_select(req)) {
 | 
				
			||||||
 | 
							struct buf_sel_arg arg = {
 | 
				
			||||||
 | 
								.iovs = &kmsg->fast_iov,
 | 
				
			||||||
 | 
								.max_len = min_not_zero(sr->len, INT_MAX),
 | 
				
			||||||
 | 
								.nr_iovs = 1,
 | 
				
			||||||
 | 
								.mode = KBUF_MODE_EXPAND,
 | 
				
			||||||
 | 
							};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (kmsg->free_iov) {
 | 
				
			||||||
 | 
								arg.nr_iovs = kmsg->free_iov_nr;
 | 
				
			||||||
 | 
								arg.iovs = kmsg->free_iov;
 | 
				
			||||||
 | 
								arg.mode |= KBUF_MODE_FREE;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!(sr->flags & IORING_RECVSEND_BUNDLE))
 | 
				
			||||||
 | 
								arg.nr_iovs = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ret = io_buffers_select(req, &arg, issue_flags);
 | 
				
			||||||
 | 
							if (unlikely(ret < 0))
 | 
				
			||||||
 | 
								return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							sr->len = arg.out_len;
 | 
				
			||||||
 | 
							iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, arg.iovs, ret,
 | 
				
			||||||
 | 
									arg.out_len);
 | 
				
			||||||
 | 
							if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
 | 
				
			||||||
 | 
								kmsg->free_iov_nr = ret;
 | 
				
			||||||
 | 
								kmsg->free_iov = arg.iovs;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If MSG_WAITALL is set, or this is a bundle send, then we need
 | 
				
			||||||
 | 
						 * the full amount. If just bundle is set, if we do a short send
 | 
				
			||||||
 | 
						 * then we complete the bundle sequence rather than continue on.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
 | 
				
			||||||
		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 | 
							min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 | 
						flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 | 
				
			||||||
| 
						 | 
					@ -534,10 +643,12 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
 | 
				
			||||||
		ret += sr->done_io;
 | 
							ret += sr->done_io;
 | 
				
			||||||
	else if (sr->done_io)
 | 
						else if (sr->done_io)
 | 
				
			||||||
		ret = sr->done_io;
 | 
							ret = sr->done_io;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!io_send_finish(req, &ret, kmsg, issue_flags))
 | 
				
			||||||
 | 
							goto retry_bundle;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	io_req_msg_cleanup(req, issue_flags);
 | 
						io_req_msg_cleanup(req, issue_flags);
 | 
				
			||||||
	cflags = io_put_kbuf(req, issue_flags);
 | 
						return ret;
 | 
				
			||||||
	io_req_set_res(req, ret, cflags);
 | 
					 | 
				
			||||||
	return IOU_OK;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int io_recvmsg_mshot_prep(struct io_kiocb *req,
 | 
					static int io_recvmsg_mshot_prep(struct io_kiocb *req,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue