forked from mirrors/linux
		
	splice, net: Use sendmsg(MSG_SPLICE_PAGES) rather than ->sendpage()
Replace generic_splice_sendpage() + splice_from_pipe + pipe_to_sendpage() with a net-specific handler, splice_to_socket(), that calls sendmsg() with MSG_SPLICE_PAGES set instead of calling ->sendpage(). MSG_MORE is used to indicate if the sendmsg() is expected to be followed with more data. This allows multiple pipe-buffer pages to be passed in a single call in a BVEC iterator, allowing the processing to be pushed down to a loop in the protocol driver. This helps pave the way for passing multipage folios down too. Protocols that haven't been converted to handle MSG_SPLICE_PAGES yet should just ignore it and do a normal sendmsg() for now - although that may be a bit slower as it may copy everything. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> cc: Jens Axboe <axboe@kernel.dk> cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									81840b3b91
								
							
						
					
					
						commit
						2dc334f1a6
					
				
					 4 changed files with 131 additions and 57 deletions
				
			
		
							
								
								
									
										158
									
								
								fs/splice.c
									
									
									
									
									
								
							
							
						
						
									
										158
									
								
								fs/splice.c
									
									
									
									
									
								
							|  | @ -33,6 +33,7 @@ | |||
| #include <linux/fsnotify.h> | ||||
| #include <linux/security.h> | ||||
| #include <linux/gfp.h> | ||||
| #include <linux/net.h> | ||||
| #include <linux/socket.h> | ||||
| #include <linux/sched/signal.h> | ||||
| 
 | ||||
|  | @ -448,30 +449,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = { | |||
| }; | ||||
| EXPORT_SYMBOL(nosteal_pipe_buf_ops); | ||||
| 
 | ||||
| /*
 | ||||
|  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' | ||||
|  * using sendpage(). Return the number of bytes sent. | ||||
|  */ | ||||
| static int pipe_to_sendpage(struct pipe_inode_info *pipe, | ||||
| 			    struct pipe_buffer *buf, struct splice_desc *sd) | ||||
| { | ||||
| 	struct file *file = sd->u.file; | ||||
| 	loff_t pos = sd->pos; | ||||
| 	int more; | ||||
| 
 | ||||
| 	if (!likely(file->f_op->sendpage)) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; | ||||
| 
 | ||||
| 	if (sd->len < sd->total_len && | ||||
| 	    pipe_occupancy(pipe->head, pipe->tail) > 1) | ||||
| 		more |= MSG_SENDPAGE_NOTLAST; | ||||
| 
 | ||||
| 	return file->f_op->sendpage(file, buf->page, buf->offset, | ||||
| 				    sd->len, &pos, more); | ||||
| } | ||||
| 
 | ||||
| static void wakeup_pipe_writers(struct pipe_inode_info *pipe) | ||||
| { | ||||
| 	smp_mb(); | ||||
|  | @ -652,7 +629,7 @@ static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_des | |||
|  * Description: | ||||
|  *    This function does little more than loop over the pipe and call | ||||
|  *    @actor to do the actual moving of a single struct pipe_buffer to | ||||
|  *    the desired destination. See pipe_to_file, pipe_to_sendpage, or | ||||
|  *    the desired destination. See pipe_to_file, pipe_to_sendmsg, or | ||||
|  *    pipe_to_user. | ||||
|  * | ||||
|  */ | ||||
|  | @ -833,8 +810,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
| 
 | ||||
| EXPORT_SYMBOL(iter_file_splice_write); | ||||
| 
 | ||||
| #ifdef CONFIG_NET | ||||
| /**
 | ||||
|  * generic_splice_sendpage - splice data from a pipe to a socket | ||||
|  * splice_to_socket - splice data from a pipe to a socket | ||||
|  * @pipe:	pipe to splice from | ||||
|  * @out:	socket to write to | ||||
|  * @ppos:	position in @out | ||||
|  | @ -846,13 +824,131 @@ EXPORT_SYMBOL(iter_file_splice_write); | |||
|  *    is involved. | ||||
|  * | ||||
|  */ | ||||
| ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, | ||||
| 				loff_t *ppos, size_t len, unsigned int flags) | ||||
| ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, | ||||
| 			 loff_t *ppos, size_t len, unsigned int flags) | ||||
| { | ||||
| 	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); | ||||
| } | ||||
| 	struct socket *sock = sock_from_file(out); | ||||
| 	struct bio_vec bvec[16]; | ||||
| 	struct msghdr msg = {}; | ||||
| 	ssize_t ret = 0; | ||||
| 	size_t spliced = 0; | ||||
| 	bool need_wakeup = false; | ||||
| 
 | ||||
| EXPORT_SYMBOL(generic_splice_sendpage); | ||||
| 	pipe_lock(pipe); | ||||
| 
 | ||||
| 	while (len > 0) { | ||||
| 		unsigned int head, tail, mask, bc = 0; | ||||
| 		size_t remain = len; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Check for signal early to make process killable when there | ||||
| 		 * are always buffers available | ||||
| 		 */ | ||||
| 		ret = -ERESTARTSYS; | ||||
| 		if (signal_pending(current)) | ||||
| 			break; | ||||
| 
 | ||||
| 		while (pipe_empty(pipe->head, pipe->tail)) { | ||||
| 			ret = 0; | ||||
| 			if (!pipe->writers) | ||||
| 				goto out; | ||||
| 
 | ||||
| 			if (spliced) | ||||
| 				goto out; | ||||
| 
 | ||||
| 			ret = -EAGAIN; | ||||
| 			if (flags & SPLICE_F_NONBLOCK) | ||||
| 				goto out; | ||||
| 
 | ||||
| 			ret = -ERESTARTSYS; | ||||
| 			if (signal_pending(current)) | ||||
| 				goto out; | ||||
| 
 | ||||
| 			if (need_wakeup) { | ||||
| 				wakeup_pipe_writers(pipe); | ||||
| 				need_wakeup = false; | ||||
| 			} | ||||
| 
 | ||||
| 			pipe_wait_readable(pipe); | ||||
| 		} | ||||
| 
 | ||||
| 		head = pipe->head; | ||||
| 		tail = pipe->tail; | ||||
| 		mask = pipe->ring_size - 1; | ||||
| 
 | ||||
| 		while (!pipe_empty(head, tail)) { | ||||
| 			struct pipe_buffer *buf = &pipe->bufs[tail & mask]; | ||||
| 			size_t seg; | ||||
| 
 | ||||
| 			if (!buf->len) { | ||||
| 				tail++; | ||||
| 				continue; | ||||
| 			} | ||||
| 
 | ||||
| 			seg = min_t(size_t, remain, buf->len); | ||||
| 			seg = min_t(size_t, seg, PAGE_SIZE); | ||||
| 
 | ||||
| 			ret = pipe_buf_confirm(pipe, buf); | ||||
| 			if (unlikely(ret)) { | ||||
| 				if (ret == -ENODATA) | ||||
| 					ret = 0; | ||||
| 				break; | ||||
| 			} | ||||
| 
 | ||||
| 			bvec_set_page(&bvec[bc++], buf->page, seg, buf->offset); | ||||
| 			remain -= seg; | ||||
| 			if (seg >= buf->len) | ||||
| 				tail++; | ||||
| 			if (bc >= ARRAY_SIZE(bvec)) | ||||
| 				break; | ||||
| 		} | ||||
| 
 | ||||
| 		if (!bc) | ||||
| 			break; | ||||
| 
 | ||||
| 		msg.msg_flags = MSG_SPLICE_PAGES; | ||||
| 		if (flags & SPLICE_F_MORE) | ||||
| 			msg.msg_flags |= MSG_MORE; | ||||
| 		if (remain && pipe_occupancy(pipe->head, tail) > 0) | ||||
| 			msg.msg_flags |= MSG_MORE; | ||||
| 
 | ||||
| 		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc, | ||||
| 			      len - remain); | ||||
| 		ret = sock_sendmsg(sock, &msg); | ||||
| 		if (ret <= 0) | ||||
| 			break; | ||||
| 
 | ||||
| 		spliced += ret; | ||||
| 		len -= ret; | ||||
| 		tail = pipe->tail; | ||||
| 		while (ret > 0) { | ||||
| 			struct pipe_buffer *buf = &pipe->bufs[tail & mask]; | ||||
| 			size_t seg = min_t(size_t, ret, buf->len); | ||||
| 
 | ||||
| 			buf->offset += seg; | ||||
| 			buf->len -= seg; | ||||
| 			ret -= seg; | ||||
| 
 | ||||
| 			if (!buf->len) { | ||||
| 				pipe_buf_release(pipe, buf); | ||||
| 				tail++; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if (tail != pipe->tail) { | ||||
| 			pipe->tail = tail; | ||||
| 			if (pipe->files) | ||||
| 				need_wakeup = true; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
| 	pipe_unlock(pipe); | ||||
| 	if (need_wakeup) | ||||
| 		wakeup_pipe_writers(pipe); | ||||
| 	return spliced ?: ret; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| static int warn_unsupported(struct file *file, const char *op) | ||||
| { | ||||
|  |  | |||
|  | @ -2759,8 +2759,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, | |||
| 		struct pipe_inode_info *, size_t, unsigned int); | ||||
| extern ssize_t iter_file_splice_write(struct pipe_inode_info *, | ||||
| 		struct file *, loff_t *, size_t, unsigned int); | ||||
| extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, | ||||
| 		struct file *out, loff_t *, size_t len, unsigned int flags); | ||||
| extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||||
| 		loff_t *opos, size_t len, unsigned int flags); | ||||
| 
 | ||||
|  |  | |||
|  | @ -84,6 +84,8 @@ extern long do_splice(struct file *in, loff_t *off_in, | |||
| 
 | ||||
| extern long do_tee(struct file *in, struct file *out, size_t len, | ||||
| 		   unsigned int flags); | ||||
| extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, | ||||
| 				loff_t *ppos, size_t len, unsigned int flags); | ||||
| 
 | ||||
| /*
 | ||||
|  * for dynamic pipe sizing | ||||
|  |  | |||
							
								
								
									
										26
									
								
								net/socket.c
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								net/socket.c
									
									
									
									
									
								
							|  | @ -57,6 +57,7 @@ | |||
| #include <linux/mm.h> | ||||
| #include <linux/socket.h> | ||||
| #include <linux/file.h> | ||||
| #include <linux/splice.h> | ||||
| #include <linux/net.h> | ||||
| #include <linux/interrupt.h> | ||||
| #include <linux/thread_info.h> | ||||
|  | @ -126,8 +127,6 @@ static long compat_sock_ioctl(struct file *file, | |||
| 			      unsigned int cmd, unsigned long arg); | ||||
| #endif | ||||
| static int sock_fasync(int fd, struct file *filp, int on); | ||||
| static ssize_t sock_sendpage(struct file *file, struct page *page, | ||||
| 			     int offset, size_t size, loff_t *ppos, int more); | ||||
| static ssize_t sock_splice_read(struct file *file, loff_t *ppos, | ||||
| 				struct pipe_inode_info *pipe, size_t len, | ||||
| 				unsigned int flags); | ||||
|  | @ -162,8 +161,7 @@ static const struct file_operations socket_file_ops = { | |||
| 	.mmap =		sock_mmap, | ||||
| 	.release =	sock_close, | ||||
| 	.fasync =	sock_fasync, | ||||
| 	.sendpage =	sock_sendpage, | ||||
| 	.splice_write = generic_splice_sendpage, | ||||
| 	.splice_write = splice_to_socket, | ||||
| 	.splice_read =	sock_splice_read, | ||||
| 	.show_fdinfo =	sock_show_fdinfo, | ||||
| }; | ||||
|  | @ -1066,26 +1064,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, | |||
| } | ||||
| EXPORT_SYMBOL(kernel_recvmsg); | ||||
| 
 | ||||
| static ssize_t sock_sendpage(struct file *file, struct page *page, | ||||
| 			     int offset, size_t size, loff_t *ppos, int more) | ||||
| { | ||||
| 	struct socket *sock; | ||||
| 	int flags; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	sock = file->private_data; | ||||
| 
 | ||||
| 	flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; | ||||
| 	/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ | ||||
| 	flags |= more; | ||||
| 
 | ||||
| 	ret = kernel_sendpage(sock, page, offset, size, flags); | ||||
| 
 | ||||
| 	if (trace_sock_send_length_enabled()) | ||||
| 		call_trace_sock_send_length(sock->sk, ret, 0); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static ssize_t sock_splice_read(struct file *file, loff_t *ppos, | ||||
| 				struct pipe_inode_info *pipe, size_t len, | ||||
| 				unsigned int flags) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 David Howells
						David Howells