forked from mirrors/linux
		
	io_uring/net: zerocopy sendmsg
Add a zerocopy version of sendmsg. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/6aabc4bdfc0ec78df6ec9328137e394af9d4e7ef.1663668091.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									c4c0009e0b
								
							
						
					
					
						commit
						493108d95f
					
				
					 4 changed files with 107 additions and 5 deletions
				
			
		| 
						 | 
					@ -213,6 +213,7 @@ enum io_uring_op {
 | 
				
			||||||
	IORING_OP_SOCKET,
 | 
						IORING_OP_SOCKET,
 | 
				
			||||||
	IORING_OP_URING_CMD,
 | 
						IORING_OP_URING_CMD,
 | 
				
			||||||
	IORING_OP_SEND_ZC,
 | 
						IORING_OP_SEND_ZC,
 | 
				
			||||||
 | 
						IORING_OP_SENDMSG_ZC,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* this goes last, obviously */
 | 
						/* this goes last, obviously */
 | 
				
			||||||
	IORING_OP_LAST,
 | 
						IORING_OP_LAST,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -909,7 +909,12 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 | 
				
			||||||
void io_send_zc_cleanup(struct io_kiocb *req)
 | 
					void io_send_zc_cleanup(struct io_kiocb *req)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
						struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
				
			||||||
 | 
						struct io_async_msghdr *io;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (req_has_async_data(req)) {
 | 
				
			||||||
 | 
							io = req->async_data;
 | 
				
			||||||
 | 
							kfree(io->free_iov);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	zc->notif->flags |= REQ_F_CQE_SKIP;
 | 
						zc->notif->flags |= REQ_F_CQE_SKIP;
 | 
				
			||||||
	io_notif_flush(zc->notif);
 | 
						io_notif_flush(zc->notif);
 | 
				
			||||||
	zc->notif = NULL;
 | 
						zc->notif = NULL;
 | 
				
			||||||
| 
						 | 
					@ -921,8 +926,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
				
			||||||
	struct io_ring_ctx *ctx = req->ctx;
 | 
						struct io_ring_ctx *ctx = req->ctx;
 | 
				
			||||||
	struct io_kiocb *notif;
 | 
						struct io_kiocb *notif;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) ||
 | 
						if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
 | 
				
			||||||
	    READ_ONCE(sqe->__pad3[0]))
 | 
					 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
 | 
						/* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
 | 
				
			||||||
	if (req->flags & REQ_F_CQE_SKIP)
 | 
						if (req->flags & REQ_F_CQE_SKIP)
 | 
				
			||||||
| 
						 | 
					@ -949,14 +953,24 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
				
			||||||
		io_req_set_rsrc_node(notif, ctx, 0);
 | 
							io_req_set_rsrc_node(notif, ctx, 0);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (req->opcode == IORING_OP_SEND_ZC) {
 | 
				
			||||||
 | 
							if (READ_ONCE(sqe->__pad3[0]))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 | 
				
			||||||
 | 
							zc->addr_len = READ_ONCE(sqe->addr_len);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							if (unlikely(sqe->addr2 || sqe->file_index))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
 | 
						zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
 | 
				
			||||||
	zc->len = READ_ONCE(sqe->len);
 | 
						zc->len = READ_ONCE(sqe->len);
 | 
				
			||||||
	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 | 
						zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
 | 
				
			||||||
	if (zc->msg_flags & MSG_DONTWAIT)
 | 
						if (zc->msg_flags & MSG_DONTWAIT)
 | 
				
			||||||
		req->flags |= REQ_F_NOWAIT;
 | 
							req->flags |= REQ_F_NOWAIT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 | 
					 | 
				
			||||||
	zc->addr_len = READ_ONCE(sqe->addr_len);
 | 
					 | 
				
			||||||
	zc->done_io = 0;
 | 
						zc->done_io = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_COMPAT
 | 
					#ifdef CONFIG_COMPAT
 | 
				
			||||||
| 
						 | 
					@ -1118,6 +1132,73 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
 | 
				
			||||||
	return IOU_OK;
 | 
						return IOU_OK;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
				
			||||||
 | 
						struct io_async_msghdr iomsg, *kmsg;
 | 
				
			||||||
 | 
						struct socket *sock;
 | 
				
			||||||
 | 
						unsigned flags, cflags;
 | 
				
			||||||
 | 
						int ret, min_ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						sock = sock_from_file(req->file);
 | 
				
			||||||
 | 
						if (unlikely(!sock))
 | 
				
			||||||
 | 
							return -ENOTSOCK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (req_has_async_data(req)) {
 | 
				
			||||||
 | 
							kmsg = req->async_data;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							ret = io_sendmsg_copy_hdr(req, &iomsg);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
 | 
								return ret;
 | 
				
			||||||
 | 
							kmsg = &iomsg;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!(req->flags & REQ_F_POLLED) &&
 | 
				
			||||||
 | 
						    (sr->flags & IORING_RECVSEND_POLL_FIRST))
 | 
				
			||||||
 | 
							return io_setup_async_msg(req, kmsg, issue_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						flags = sr->msg_flags | MSG_ZEROCOPY;
 | 
				
			||||||
 | 
						if (issue_flags & IO_URING_F_NONBLOCK)
 | 
				
			||||||
 | 
							flags |= MSG_DONTWAIT;
 | 
				
			||||||
 | 
						if (flags & MSG_WAITALL)
 | 
				
			||||||
 | 
							min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
 | 
				
			||||||
 | 
						kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
 | 
				
			||||||
 | 
						ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (unlikely(ret < min_ret)) {
 | 
				
			||||||
 | 
							if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 | 
				
			||||||
 | 
								return io_setup_async_msg(req, kmsg, issue_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (ret > 0 && io_net_retry(sock, flags)) {
 | 
				
			||||||
 | 
								sr->done_io += ret;
 | 
				
			||||||
 | 
								req->flags |= REQ_F_PARTIAL_IO;
 | 
				
			||||||
 | 
								return io_setup_async_msg(req, kmsg, issue_flags);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if (ret < 0 && !sr->done_io)
 | 
				
			||||||
 | 
								sr->notif->flags |= REQ_F_CQE_SKIP;
 | 
				
			||||||
 | 
							if (ret == -ERESTARTSYS)
 | 
				
			||||||
 | 
								ret = -EINTR;
 | 
				
			||||||
 | 
							req_set_fail(req);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						/* fast path, check for non-NULL to avoid function call */
 | 
				
			||||||
 | 
						if (kmsg->free_iov)
 | 
				
			||||||
 | 
							kfree(kmsg->free_iov);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						io_netmsg_recycle(req, issue_flags);
 | 
				
			||||||
 | 
						if (ret >= 0)
 | 
				
			||||||
 | 
							ret += sr->done_io;
 | 
				
			||||||
 | 
						else if (sr->done_io)
 | 
				
			||||||
 | 
							ret = sr->done_io;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						io_notif_flush(sr->notif);
 | 
				
			||||||
 | 
						req->flags &= ~REQ_F_NEED_CLEANUP;
 | 
				
			||||||
 | 
						cflags = ret >= 0 ? IORING_CQE_F_MORE : 0;
 | 
				
			||||||
 | 
						io_req_set_res(req, ret, cflags);
 | 
				
			||||||
 | 
						return IOU_OK;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void io_sendrecv_fail(struct io_kiocb *req)
 | 
					void io_sendrecv_fail(struct io_kiocb *req)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
						struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 | 
				
			||||||
| 
						 | 
					@ -1127,7 +1208,7 @@ void io_sendrecv_fail(struct io_kiocb *req)
 | 
				
			||||||
	if (req->flags & REQ_F_PARTIAL_IO)
 | 
						if (req->flags & REQ_F_PARTIAL_IO)
 | 
				
			||||||
		res = sr->done_io;
 | 
							res = sr->done_io;
 | 
				
			||||||
	if ((req->flags & REQ_F_NEED_CLEANUP) &&
 | 
						if ((req->flags & REQ_F_NEED_CLEANUP) &&
 | 
				
			||||||
	    req->opcode == IORING_OP_SEND_ZC) {
 | 
						    (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) {
 | 
				
			||||||
		/* preserve notification for partial I/O */
 | 
							/* preserve notification for partial I/O */
 | 
				
			||||||
		if (res < 0)
 | 
							if (res < 0)
 | 
				
			||||||
			sr->notif->flags |= REQ_F_CQE_SKIP;
 | 
								sr->notif->flags |= REQ_F_CQE_SKIP;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -57,6 +57,7 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 | 
				
			||||||
int io_connect(struct io_kiocb *req, unsigned int issue_flags);
 | 
					int io_connect(struct io_kiocb *req, unsigned int issue_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int io_send_zc(struct io_kiocb *req, unsigned int issue_flags);
 | 
					int io_send_zc(struct io_kiocb *req, unsigned int issue_flags);
 | 
				
			||||||
 | 
					int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags);
 | 
				
			||||||
int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 | 
					int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 | 
				
			||||||
void io_send_zc_cleanup(struct io_kiocb *req);
 | 
					void io_send_zc_cleanup(struct io_kiocb *req);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -503,6 +503,25 @@ const struct io_op_def io_op_defs[] = {
 | 
				
			||||||
		.fail			= io_sendrecv_fail,
 | 
							.fail			= io_sendrecv_fail,
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
		.prep			= io_eopnotsupp_prep,
 | 
							.prep			= io_eopnotsupp_prep,
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						[IORING_OP_SENDMSG_ZC] = {
 | 
				
			||||||
 | 
							.name			= "SENDMSG_ZC",
 | 
				
			||||||
 | 
							.needs_file		= 1,
 | 
				
			||||||
 | 
							.unbound_nonreg_file	= 1,
 | 
				
			||||||
 | 
							.pollout		= 1,
 | 
				
			||||||
 | 
							.audit_skip		= 1,
 | 
				
			||||||
 | 
							.ioprio			= 1,
 | 
				
			||||||
 | 
							.manual_alloc		= 1,
 | 
				
			||||||
 | 
					#if defined(CONFIG_NET)
 | 
				
			||||||
 | 
							.async_size		= sizeof(struct io_async_msghdr),
 | 
				
			||||||
 | 
							.prep			= io_send_zc_prep,
 | 
				
			||||||
 | 
							.issue			= io_sendmsg_zc,
 | 
				
			||||||
 | 
							.prep_async		= io_sendmsg_prep_async,
 | 
				
			||||||
 | 
							.cleanup		= io_send_zc_cleanup,
 | 
				
			||||||
 | 
							.fail			= io_sendrecv_fail,
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
							.prep			= io_eopnotsupp_prep,
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue