mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	io_uring: add splice(2) support
Add support for splice(2). - output file is specified as sqe->fd, so it's handled by generic code - hash_reg_file handled by generic code as well - len is 32bit, but should be fine - the fd_in is registered file, when SPLICE_F_FD_IN_FIXED is set, which is a splice flag (i.e. sqe->splice_flags). Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									8da11c1994
								
							
						
					
					
						commit
						7d67af2c01
					
				
					 2 changed files with 122 additions and 1 deletions
				
			
		
							
								
								
									
										109
									
								
								fs/io_uring.c
									
									
									
									
									
								
							
							
						
						
									
										109
									
								
								fs/io_uring.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -76,6 +76,7 @@
 | 
			
		|||
#include <linux/fadvise.h>
 | 
			
		||||
#include <linux/eventpoll.h>
 | 
			
		||||
#include <linux/fs_struct.h>
 | 
			
		||||
#include <linux/splice.h>
 | 
			
		||||
 | 
			
		||||
#define CREATE_TRACE_POINTS
 | 
			
		||||
#include <trace/events/io_uring.h>
 | 
			
		||||
| 
						 | 
				
			
			@ -428,6 +429,15 @@ struct io_epoll {
 | 
			
		|||
	struct epoll_event		event;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct io_splice {
 | 
			
		||||
	struct file			*file_out;
 | 
			
		||||
	struct file			*file_in;
 | 
			
		||||
	loff_t				off_out;
 | 
			
		||||
	loff_t				off_in;
 | 
			
		||||
	u64				len;
 | 
			
		||||
	unsigned int			flags;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct io_async_connect {
 | 
			
		||||
	struct sockaddr_storage		address;
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -544,6 +554,7 @@ struct io_kiocb {
 | 
			
		|||
		struct io_fadvise	fadvise;
 | 
			
		||||
		struct io_madvise	madvise;
 | 
			
		||||
		struct io_epoll		epoll;
 | 
			
		||||
		struct io_splice	splice;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	struct io_async_ctx		*io;
 | 
			
		||||
| 
						 | 
				
			
			@ -744,6 +755,11 @@ static const struct io_op_def io_op_defs[] = {
 | 
			
		|||
		.unbound_nonreg_file	= 1,
 | 
			
		||||
		.file_table		= 1,
 | 
			
		||||
	},
 | 
			
		||||
	[IORING_OP_SPLICE] = {
 | 
			
		||||
		.needs_file		= 1,
 | 
			
		||||
		.hash_reg_file		= 1,
 | 
			
		||||
		.unbound_nonreg_file	= 1,
 | 
			
		||||
	}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void io_wq_submit_work(struct io_wq_work **workptr);
 | 
			
		||||
| 
						 | 
				
			
			@ -758,6 +774,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 | 
			
		|||
static int io_grab_files(struct io_kiocb *req);
 | 
			
		||||
static void io_ring_file_ref_flush(struct fixed_file_data *data);
 | 
			
		||||
static void io_cleanup_req(struct io_kiocb *req);
 | 
			
		||||
static int io_file_get(struct io_submit_state *state,
 | 
			
		||||
		       struct io_kiocb *req,
 | 
			
		||||
		       int fd, struct file **out_file,
 | 
			
		||||
		       bool fixed);
 | 
			
		||||
 | 
			
		||||
static struct kmem_cache *req_cachep;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2404,6 +2424,77 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
			
		||||
{
 | 
			
		||||
	struct io_splice* sp = &req->splice;
 | 
			
		||||
	unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (req->flags & REQ_F_NEED_CLEANUP)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	sp->file_in = NULL;
 | 
			
		||||
	sp->off_in = READ_ONCE(sqe->splice_off_in);
 | 
			
		||||
	sp->off_out = READ_ONCE(sqe->off);
 | 
			
		||||
	sp->len = READ_ONCE(sqe->len);
 | 
			
		||||
	sp->flags = READ_ONCE(sqe->splice_flags);
 | 
			
		||||
 | 
			
		||||
	if (unlikely(sp->flags & ~valid_flags))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	ret = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), &sp->file_in,
 | 
			
		||||
			  (sp->flags & SPLICE_F_FD_IN_FIXED));
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return ret;
 | 
			
		||||
	req->flags |= REQ_F_NEED_CLEANUP;
 | 
			
		||||
 | 
			
		||||
	if (!S_ISREG(file_inode(sp->file_in)->i_mode))
 | 
			
		||||
		req->work.flags |= IO_WQ_WORK_UNBOUND;
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool io_splice_punt(struct file *file)
 | 
			
		||||
{
 | 
			
		||||
	if (get_pipe_info(file))
 | 
			
		||||
		return false;
 | 
			
		||||
	if (!io_file_supports_async(file))
 | 
			
		||||
		return true;
 | 
			
		||||
	return !(file->f_mode & O_NONBLOCK);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
 | 
			
		||||
		     bool force_nonblock)
 | 
			
		||||
{
 | 
			
		||||
	struct io_splice *sp = &req->splice;
 | 
			
		||||
	struct file *in = sp->file_in;
 | 
			
		||||
	struct file *out = sp->file_out;
 | 
			
		||||
	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
 | 
			
		||||
	loff_t *poff_in, *poff_out;
 | 
			
		||||
	long ret;
 | 
			
		||||
 | 
			
		||||
	if (force_nonblock) {
 | 
			
		||||
		if (io_splice_punt(in) || io_splice_punt(out))
 | 
			
		||||
			return -EAGAIN;
 | 
			
		||||
		flags |= SPLICE_F_NONBLOCK;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
 | 
			
		||||
	poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
 | 
			
		||||
	ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
 | 
			
		||||
	if (force_nonblock && ret == -EAGAIN)
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
	io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
 | 
			
		||||
	req->flags &= ~REQ_F_NEED_CLEANUP;
 | 
			
		||||
 | 
			
		||||
	io_cqring_add_event(req, ret);
 | 
			
		||||
	if (ret != sp->len)
 | 
			
		||||
		req_set_fail_links(req);
 | 
			
		||||
	io_put_req_find_next(req, nxt);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * IORING_OP_NOP just posts a completion event, nothing else.
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			@ -4230,6 +4321,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
 | 
			
		|||
	case IORING_OP_EPOLL_CTL:
 | 
			
		||||
		ret = io_epoll_ctl_prep(req, sqe);
 | 
			
		||||
		break;
 | 
			
		||||
	case IORING_OP_SPLICE:
 | 
			
		||||
		ret = io_splice_prep(req, sqe);
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
 | 
			
		||||
				req->opcode);
 | 
			
		||||
| 
						 | 
				
			
			@ -4292,6 +4386,10 @@ static void io_cleanup_req(struct io_kiocb *req)
 | 
			
		|||
	case IORING_OP_STATX:
 | 
			
		||||
		putname(req->open.filename);
 | 
			
		||||
		break;
 | 
			
		||||
	case IORING_OP_SPLICE:
 | 
			
		||||
		io_put_file(req, req->splice.file_in,
 | 
			
		||||
			    (req->splice.flags & SPLICE_F_FD_IN_FIXED));
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	req->flags &= ~REQ_F_NEED_CLEANUP;
 | 
			
		||||
| 
						 | 
				
			
			@ -4495,6 +4593,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 | 
			
		|||
		}
 | 
			
		||||
		ret = io_epoll_ctl(req, nxt, force_nonblock);
 | 
			
		||||
		break;
 | 
			
		||||
	case IORING_OP_SPLICE:
 | 
			
		||||
		if (sqe) {
 | 
			
		||||
			ret = io_splice_prep(req, sqe);
 | 
			
		||||
			if (ret < 0)
 | 
			
		||||
				break;
 | 
			
		||||
		}
 | 
			
		||||
		ret = io_splice(req, nxt, force_nonblock);
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
		break;
 | 
			
		||||
| 
						 | 
				
			
			@ -7230,6 +7336,7 @@ static int __init io_uring_init(void)
 | 
			
		|||
	BUILD_BUG_SQE_ELEM(8,  __u64,  off);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(8,  __u64,  addr2);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(16, __u64,  addr);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(16, __u64,  splice_off_in);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(24, __u32,  len);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(28,     __kernel_rwf_t, rw_flags);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(28, /* compat */   int, rw_flags);
 | 
			
		||||
| 
						 | 
				
			
			@ -7244,9 +7351,11 @@ static int __init io_uring_init(void)
 | 
			
		|||
	BUILD_BUG_SQE_ELEM(28, __u32,  open_flags);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(28, __u32,  statx_flags);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(28, __u32,  fadvise_advice);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(28, __u32,  splice_flags);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(32, __u64,  user_data);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(40, __u16,  buf_index);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
 | 
			
		||||
	BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);
 | 
			
		||||
 | 
			
		||||
	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
 | 
			
		||||
	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -23,7 +23,10 @@ struct io_uring_sqe {
 | 
			
		|||
		__u64	off;	/* offset into file */
 | 
			
		||||
		__u64	addr2;
 | 
			
		||||
	};
 | 
			
		||||
	__u64	addr;		/* pointer to buffer or iovecs */
 | 
			
		||||
	union {
 | 
			
		||||
		__u64	addr;	/* pointer to buffer or iovecs */
 | 
			
		||||
		__u64	splice_off_in;
 | 
			
		||||
	};
 | 
			
		||||
	__u32	len;		/* buffer size or number of iovecs */
 | 
			
		||||
	union {
 | 
			
		||||
		__kernel_rwf_t	rw_flags;
 | 
			
		||||
| 
						 | 
				
			
			@ -37,6 +40,7 @@ struct io_uring_sqe {
 | 
			
		|||
		__u32		open_flags;
 | 
			
		||||
		__u32		statx_flags;
 | 
			
		||||
		__u32		fadvise_advice;
 | 
			
		||||
		__u32		splice_flags;
 | 
			
		||||
	};
 | 
			
		||||
	__u64	user_data;	/* data to be passed back at completion time */
 | 
			
		||||
	union {
 | 
			
		||||
| 
						 | 
				
			
			@ -45,6 +49,7 @@ struct io_uring_sqe {
 | 
			
		|||
			__u16	buf_index;
 | 
			
		||||
			/* personality to use, if used */
 | 
			
		||||
			__u16	personality;
 | 
			
		||||
			__s32	splice_fd_in;
 | 
			
		||||
		};
 | 
			
		||||
		__u64	__pad2[3];
 | 
			
		||||
	};
 | 
			
		||||
| 
						 | 
				
			
			@ -113,6 +118,7 @@ enum {
 | 
			
		|||
	IORING_OP_RECV,
 | 
			
		||||
	IORING_OP_OPENAT2,
 | 
			
		||||
	IORING_OP_EPOLL_CTL,
 | 
			
		||||
	IORING_OP_SPLICE,
 | 
			
		||||
 | 
			
		||||
	/* this goes last, obviously */
 | 
			
		||||
	IORING_OP_LAST,
 | 
			
		||||
| 
						 | 
				
			
			@ -128,6 +134,12 @@ enum {
 | 
			
		|||
 */
 | 
			
		||||
#define IORING_TIMEOUT_ABS	(1U << 0)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * sqe->splice_flags
 | 
			
		||||
 * extends splice(2) flags
 | 
			
		||||
 */
 | 
			
		||||
#define SPLICE_F_FD_IN_FIXED	(1U << 31) /* the last bit of __u32 */
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * IO completion data structure (Completion Queue Entry)
 | 
			
		||||
 */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue