mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	[PATCH] splice: add support for sys_tee()
Basically an in-kernel implementation of tee, which uses splice and the pipe buffers as an intelligent way to pass data around by reference. Where the user space tee consumes the input and produces a stdout and file output, this syscall merely duplicates the data inside a pipe to another pipe. No data is copied, the output just grabs a reference to the input pipe data. Signed-off-by: Jens Axboe <axboe@suse.de>
This commit is contained in:
		
							parent
							
								
									cbb7e577e7
								
							
						
					
					
						commit
						70524490ee
					
				
					 11 changed files with 208 additions and 4 deletions
				
			
		|  | @ -314,3 +314,4 @@ ENTRY(sys_call_table) | |||
| 	.long sys_get_robust_list
 | ||||
| 	.long sys_splice
 | ||||
| 	.long sys_sync_file_range
 | ||||
| 	.long sys_tee			/* 315 */ | ||||
|  |  | |||
|  | @ -1609,5 +1609,6 @@ sys_call_table: | |||
| 	data8 sys_set_robust_list | ||||
| 	data8 sys_get_robust_list | ||||
| 	data8 sys_sync_file_range		// 1300 | ||||
| 	data8 sys_tee | ||||
| 
 | ||||
| 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls | ||||
|  |  | |||
|  | @ -323,3 +323,4 @@ COMPAT_SYS(pselect6) | |||
| COMPAT_SYS(ppoll) | ||||
| SYSCALL(unshare) | ||||
| SYSCALL(splice) | ||||
| SYSCALL(tee) | ||||
|  |  | |||
|  | @ -131,12 +131,19 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void anon_pipe_buf_get(struct pipe_inode_info *info, | ||||
| 			      struct pipe_buffer *buf) | ||||
| { | ||||
| 	page_cache_get(buf->page); | ||||
| } | ||||
| 
 | ||||
| static struct pipe_buf_operations anon_pipe_buf_ops = { | ||||
| 	.can_merge = 1, | ||||
| 	.map = anon_pipe_buf_map, | ||||
| 	.unmap = anon_pipe_buf_unmap, | ||||
| 	.release = anon_pipe_buf_release, | ||||
| 	.steal = anon_pipe_buf_steal, | ||||
| 	.get = anon_pipe_buf_get, | ||||
| }; | ||||
| 
 | ||||
| static ssize_t | ||||
|  |  | |||
							
								
								
									
										186
									
								
								fs/splice.c
									
									
									
									
									
								
							
							
						
						
									
										186
									
								
								fs/splice.c
									
									
									
									
									
								
							|  | @ -125,12 +125,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, | |||
| 	kunmap(buf->page); | ||||
| } | ||||
| 
 | ||||
| static void page_cache_pipe_buf_get(struct pipe_inode_info *info, | ||||
| 				    struct pipe_buffer *buf) | ||||
| { | ||||
| 	page_cache_get(buf->page); | ||||
| } | ||||
| 
 | ||||
| static struct pipe_buf_operations page_cache_pipe_buf_ops = { | ||||
| 	.can_merge = 0, | ||||
| 	.map = page_cache_pipe_buf_map, | ||||
| 	.unmap = page_cache_pipe_buf_unmap, | ||||
| 	.release = page_cache_pipe_buf_release, | ||||
| 	.steal = page_cache_pipe_buf_steal, | ||||
| 	.get = page_cache_pipe_buf_get, | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -963,3 +970,182 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, | |||
| 
 | ||||
| 	return error; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Link contents of ipipe to opipe. | ||||
|  */ | ||||
| static int link_pipe(struct pipe_inode_info *ipipe, | ||||
| 		     struct pipe_inode_info *opipe, | ||||
| 		     size_t len, unsigned int flags) | ||||
| { | ||||
| 	struct pipe_buffer *ibuf, *obuf; | ||||
| 	int ret = 0, do_wakeup = 0, i; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Potential ABBA deadlock, work around it by ordering lock | ||||
| 	 * grabbing by inode address. Otherwise two different processes | ||||
| 	 * could deadlock (one doing tee from A -> B, the other from B -> A). | ||||
| 	 */ | ||||
| 	if (ipipe->inode < opipe->inode) { | ||||
| 		mutex_lock(&ipipe->inode->i_mutex); | ||||
| 		mutex_lock(&opipe->inode->i_mutex); | ||||
| 	} else { | ||||
| 		mutex_lock(&opipe->inode->i_mutex); | ||||
| 		mutex_lock(&ipipe->inode->i_mutex); | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0;; i++) { | ||||
| 		if (!opipe->readers) { | ||||
| 			send_sig(SIGPIPE, current, 0); | ||||
| 			if (!ret) | ||||
| 				ret = -EPIPE; | ||||
| 			break; | ||||
| 		} | ||||
| 		if (ipipe->nrbufs - i) { | ||||
| 			ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * If we have room, fill this buffer | ||||
| 			 */ | ||||
| 			if (opipe->nrbufs < PIPE_BUFFERS) { | ||||
| 				int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); | ||||
| 
 | ||||
| 				/*
 | ||||
| 				 * Get a reference to this pipe buffer, | ||||
| 				 * so we can copy the contents over. | ||||
| 				 */ | ||||
| 				ibuf->ops->get(ipipe, ibuf); | ||||
| 
 | ||||
| 				obuf = opipe->bufs + nbuf; | ||||
| 				*obuf = *ibuf; | ||||
| 
 | ||||
| 				if (obuf->len > len) | ||||
| 					obuf->len = len; | ||||
| 
 | ||||
| 				opipe->nrbufs++; | ||||
| 				do_wakeup = 1; | ||||
| 				ret += obuf->len; | ||||
| 				len -= obuf->len; | ||||
| 
 | ||||
| 				if (!len) | ||||
| 					break; | ||||
| 				if (opipe->nrbufs < PIPE_BUFFERS) | ||||
| 					continue; | ||||
| 			} | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * We have input available, but no output room. | ||||
| 			 * If we already copied data, return that. | ||||
| 			 */ | ||||
| 			if (flags & SPLICE_F_NONBLOCK) { | ||||
| 				if (!ret) | ||||
| 					ret = -EAGAIN; | ||||
| 				break; | ||||
| 			} | ||||
| 			if (signal_pending(current)) { | ||||
| 				if (!ret) | ||||
| 					ret = -ERESTARTSYS; | ||||
| 				break; | ||||
| 			} | ||||
| 			if (do_wakeup) { | ||||
| 				smp_mb(); | ||||
| 				if (waitqueue_active(&opipe->wait)) | ||||
| 					wake_up_interruptible(&opipe->wait); | ||||
| 				kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||||
| 				do_wakeup = 0; | ||||
| 			} | ||||
| 
 | ||||
| 			opipe->waiting_writers++; | ||||
| 			pipe_wait(opipe); | ||||
| 			opipe->waiting_writers--; | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * No input buffers, do the usual checks for available | ||||
| 		 * writers and blocking and wait if necessary | ||||
| 		 */ | ||||
| 		if (!ipipe->writers) | ||||
| 			break; | ||||
| 		if (!ipipe->waiting_writers) { | ||||
| 			if (ret) | ||||
| 				break; | ||||
| 		} | ||||
| 		if (flags & SPLICE_F_NONBLOCK) { | ||||
| 			if (!ret) | ||||
| 				ret = -EAGAIN; | ||||
| 			break; | ||||
| 		} | ||||
| 		if (signal_pending(current)) { | ||||
| 			if (!ret) | ||||
| 				ret = -ERESTARTSYS; | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		if (waitqueue_active(&ipipe->wait)) | ||||
| 			wake_up_interruptible_sync(&ipipe->wait); | ||||
| 		kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); | ||||
| 
 | ||||
| 		pipe_wait(ipipe); | ||||
| 	} | ||||
| 
 | ||||
| 	mutex_unlock(&ipipe->inode->i_mutex); | ||||
| 	mutex_unlock(&opipe->inode->i_mutex); | ||||
| 
 | ||||
| 	if (do_wakeup) { | ||||
| 		smp_mb(); | ||||
| 		if (waitqueue_active(&opipe->wait)) | ||||
| 			wake_up_interruptible(&opipe->wait); | ||||
| 		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This is a tee(1) implementation that works on pipes. It doesn't copy | ||||
|  * any data, it simply references the 'in' pages on the 'out' pipe. | ||||
|  * The 'flags' used are the SPLICE_F_* variants, currently the only | ||||
|  * applicable one is SPLICE_F_NONBLOCK. | ||||
|  */ | ||||
| static long do_tee(struct file *in, struct file *out, size_t len, | ||||
| 		   unsigned int flags) | ||||
| { | ||||
| 	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; | ||||
| 	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Link ipipe to the two output pipes, consuming as we go along. | ||||
| 	 */ | ||||
| 	if (ipipe && opipe) | ||||
| 		return link_pipe(ipipe, opipe, len, flags); | ||||
| 
 | ||||
| 	return -EINVAL; | ||||
| } | ||||
| 
 | ||||
| asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) | ||||
| { | ||||
| 	struct file *in; | ||||
| 	int error, fput_in; | ||||
| 
 | ||||
| 	if (unlikely(!len)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	error = -EBADF; | ||||
| 	in = fget_light(fdin, &fput_in); | ||||
| 	if (in) { | ||||
| 		if (in->f_mode & FMODE_READ) { | ||||
| 			int fput_out; | ||||
| 			struct file *out = fget_light(fdout, &fput_out); | ||||
| 
 | ||||
| 			if (out) { | ||||
| 				if (out->f_mode & FMODE_WRITE) | ||||
| 					error = do_tee(in, out, len, flags); | ||||
| 				fput_light(out, fput_out); | ||||
| 			} | ||||
| 		} | ||||
|  		fput_light(in, fput_in); | ||||
|  	} | ||||
| 
 | ||||
| 	return error; | ||||
| } | ||||
|  |  | |||
|  | @ -320,8 +320,9 @@ | |||
| #define __NR_get_robust_list	312 | ||||
| #define __NR_splice		313 | ||||
| #define __NR_sync_file_range	314 | ||||
| #define __NR_tee		315 | ||||
| 
 | ||||
| #define NR_syscalls 315 | ||||
| #define NR_syscalls 316 | ||||
| 
 | ||||
| /*
 | ||||
|  * user-visible error numbers are in the range -1 - -128: see | ||||
|  |  | |||
|  | @ -289,12 +289,13 @@ | |||
| #define __NR_set_robust_list		1298 | ||||
| #define __NR_get_robust_list		1299 | ||||
| #define __NR_sync_file_range		1300 | ||||
| #define __NR_tee			1301 | ||||
| 
 | ||||
| #ifdef __KERNEL__ | ||||
| 
 | ||||
| #include <linux/config.h> | ||||
| 
 | ||||
| #define NR_syscalls			277 /* length of syscall table */ | ||||
| #define NR_syscalls			278 /* length of syscall table */ | ||||
| 
 | ||||
| #define __ARCH_WANT_SYS_RT_SIGACTION | ||||
| 
 | ||||
|  |  | |||
|  | @ -302,8 +302,9 @@ | |||
| #define __NR_ppoll		281 | ||||
| #define __NR_unshare		282 | ||||
| #define __NR_splice		283 | ||||
| #define __NR_tee		284 | ||||
| 
 | ||||
| #define __NR_syscalls		284 | ||||
| #define __NR_syscalls		285 | ||||
| 
 | ||||
| #ifdef __KERNEL__ | ||||
| #define __NR__exit __NR_exit | ||||
|  |  | |||
|  | @ -611,8 +611,10 @@ __SYSCALL(__NR_set_robust_list, sys_set_robust_list) | |||
| __SYSCALL(__NR_get_robust_list, sys_get_robust_list) | ||||
| #define __NR_splice		275 | ||||
| __SYSCALL(__NR_splice, sys_splice) | ||||
| #define __NR_tee		276 | ||||
| __SYSCALL(__NR_tee, sys_tee) | ||||
| 
 | ||||
| #define __NR_syscall_max __NR_splice | ||||
| #define __NR_syscall_max __NR_tee | ||||
| 
 | ||||
| #ifndef __NO_STUBS | ||||
| 
 | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ struct pipe_buf_operations { | |||
| 	void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); | ||||
| 	void (*release)(struct pipe_inode_info *, struct pipe_buffer *); | ||||
| 	int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); | ||||
| 	void (*get)(struct pipe_inode_info *, struct pipe_buffer *); | ||||
| }; | ||||
| 
 | ||||
| struct pipe_inode_info { | ||||
|  |  | |||
|  | @ -574,6 +574,8 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, | |||
| 			   int fd_out, loff_t __user *off_out, | ||||
| 			   size_t len, unsigned int flags); | ||||
| 
 | ||||
| asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags); | ||||
| 
 | ||||
| asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, | ||||
| 					unsigned int flags); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jens Axboe
						Jens Axboe