forked from mirrors/linux
		
	fs,io_uring: add infrastructure for uring-cmd
file_operations->uring_cmd is a file private handler. This is somewhat similar to ioctl but hopefully a lot more sane and useful as it can be used to enable many io_uring capabilities for the underlying operation. IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't know what is in this command type, it's for the provider of ->uring_cmd() to deal with. Co-developed-by: Kanchan Joshi <joshi.k@samsung.com> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20220511054750.20432-2-joshi.k@samsung.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									2bb04df7c2
								
							
						
					
					
						commit
						ee692a21e9
					
				
					 4 changed files with 165 additions and 26 deletions
				
			
		
							
								
								
									
										135
									
								
								fs/io_uring.c
									
									
									
									
									
								
							
							
						
						
									
										135
									
								
								fs/io_uring.c
									
									
									
									
									
								
							|  | @ -202,13 +202,6 @@ struct io_rings { | |||
| 	struct io_uring_cqe	cqes[] ____cacheline_aligned_in_smp; | ||||
| }; | ||||
| 
 | ||||
| enum io_uring_cmd_flags { | ||||
| 	IO_URING_F_COMPLETE_DEFER	= 1, | ||||
| 	IO_URING_F_UNLOCKED		= 2, | ||||
| 	/* int's last bit, sign checks are usually faster than a bit test */ | ||||
| 	IO_URING_F_NONBLOCK		= INT_MIN, | ||||
| }; | ||||
| 
 | ||||
| struct io_mapped_ubuf { | ||||
| 	u64		ubuf; | ||||
| 	u64		ubuf_end; | ||||
|  | @ -972,6 +965,7 @@ struct io_kiocb { | |||
| 		struct io_xattr		xattr; | ||||
| 		struct io_socket	sock; | ||||
| 		struct io_nop		nop; | ||||
| 		struct io_uring_cmd	uring_cmd; | ||||
| 	}; | ||||
| 
 | ||||
| 	u8				opcode; | ||||
|  | @ -1050,6 +1044,14 @@ struct io_cancel_data { | |||
| 	int seq; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into | ||||
|  * the following sqe if SQE128 is used. | ||||
|  */ | ||||
| #define uring_cmd_pdu_size(is_sqe128)				\ | ||||
| 	((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) -	\ | ||||
| 		offsetof(struct io_uring_sqe, cmd)) | ||||
| 
 | ||||
| struct io_op_def { | ||||
| 	/* needs req->file assigned */ | ||||
| 	unsigned		needs_file : 1; | ||||
|  | @ -1289,6 +1291,12 @@ static const struct io_op_def io_op_defs[] = { | |||
| 	[IORING_OP_SOCKET] = { | ||||
| 		.audit_skip		= 1, | ||||
| 	}, | ||||
| 	[IORING_OP_URING_CMD] = { | ||||
| 		.needs_file		= 1, | ||||
| 		.plug			= 1, | ||||
| 		.needs_async_setup	= 1, | ||||
| 		.async_size		= uring_cmd_pdu_size(1), | ||||
| 	}, | ||||
| }; | ||||
| 
 | ||||
| /* requests with any of those set should undergo io_disarm_next() */ | ||||
|  | @ -1428,6 +1436,8 @@ const char *io_uring_get_opcode(u8 opcode) | |||
| 		return "GETXATTR"; | ||||
| 	case IORING_OP_SOCKET: | ||||
| 		return "SOCKET"; | ||||
| 	case IORING_OP_URING_CMD: | ||||
| 		return "URING_CMD"; | ||||
| 	case IORING_OP_LAST: | ||||
| 		return "INVALID"; | ||||
| 	} | ||||
|  | @ -4507,10 +4517,6 @@ static int __io_getxattr_prep(struct io_kiocb *req, | |||
| 	const char __user *name; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) | ||||
| 		return -EINVAL; | ||||
| 	if (unlikely(sqe->ioprio)) | ||||
| 		return -EINVAL; | ||||
| 	if (unlikely(req->flags & REQ_F_FIXED_FILE)) | ||||
| 		return -EBADF; | ||||
| 
 | ||||
|  | @ -4620,10 +4626,6 @@ static int __io_setxattr_prep(struct io_kiocb *req, | |||
| 	const char __user *name; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) | ||||
| 		return -EINVAL; | ||||
| 	if (unlikely(sqe->ioprio)) | ||||
| 		return -EINVAL; | ||||
| 	if (unlikely(req->flags & REQ_F_FIXED_FILE)) | ||||
| 		return -EBADF; | ||||
| 
 | ||||
|  | @ -4910,6 +4912,96 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) | ||||
| { | ||||
| 	req->uring_cmd.task_work_cb(&req->uring_cmd); | ||||
| } | ||||
| 
 | ||||
| void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, | ||||
| 			void (*task_work_cb)(struct io_uring_cmd *)) | ||||
| { | ||||
| 	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd); | ||||
| 
 | ||||
| 	req->uring_cmd.task_work_cb = task_work_cb; | ||||
| 	req->io_task_work.func = io_uring_cmd_work; | ||||
| 	io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL)); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task); | ||||
| 
 | ||||
| /*
 | ||||
|  * Called by consumers of io_uring_cmd, if they originally returned | ||||
|  * -EIOCBQUEUED upon receiving the command. | ||||
|  */ | ||||
| void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) | ||||
| { | ||||
| 	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd); | ||||
| 
 | ||||
| 	if (ret < 0) | ||||
| 		req_set_fail(req); | ||||
| 	if (req->ctx->flags & IORING_SETUP_CQE32) | ||||
| 		__io_req_complete32(req, 0, ret, 0, res2, 0); | ||||
| 	else | ||||
| 		io_req_complete(req, ret); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(io_uring_cmd_done); | ||||
| 
 | ||||
| static int io_uring_cmd_prep_async(struct io_kiocb *req) | ||||
| { | ||||
| 	size_t cmd_size; | ||||
| 
 | ||||
| 	cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128); | ||||
| 
 | ||||
| 	memcpy(req->async_data, req->uring_cmd.cmd, cmd_size); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int io_uring_cmd_prep(struct io_kiocb *req, | ||||
| 			     const struct io_uring_sqe *sqe) | ||||
| { | ||||
| 	struct io_uring_cmd *ioucmd = &req->uring_cmd; | ||||
| 
 | ||||
| 	if (sqe->rw_flags) | ||||
| 		return -EINVAL; | ||||
| 	ioucmd->cmd = sqe->cmd; | ||||
| 	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) | ||||
| { | ||||
| 	struct io_uring_cmd *ioucmd = &req->uring_cmd; | ||||
| 	struct io_ring_ctx *ctx = req->ctx; | ||||
| 	struct file *file = req->file; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (!req->file->f_op->uring_cmd) | ||||
| 		return -EOPNOTSUPP; | ||||
| 
 | ||||
| 	if (ctx->flags & IORING_SETUP_SQE128) | ||||
| 		issue_flags |= IO_URING_F_SQE128; | ||||
| 	if (ctx->flags & IORING_SETUP_CQE32) | ||||
| 		issue_flags |= IO_URING_F_CQE32; | ||||
| 	if (ctx->flags & IORING_SETUP_IOPOLL) | ||||
| 		issue_flags |= IO_URING_F_IOPOLL; | ||||
| 
 | ||||
| 	if (req_has_async_data(req)) | ||||
| 		ioucmd->cmd = req->async_data; | ||||
| 
 | ||||
| 	ret = file->f_op->uring_cmd(ioucmd, issue_flags); | ||||
| 	if (ret == -EAGAIN) { | ||||
| 		if (!req_has_async_data(req)) { | ||||
| 			if (io_alloc_async_data(req)) | ||||
| 				return -ENOMEM; | ||||
| 			io_uring_cmd_prep_async(req); | ||||
| 		} | ||||
| 		return -EAGAIN; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ret != -EIOCBQUEUED) | ||||
| 		io_uring_cmd_done(ioucmd, ret, 0); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int io_shutdown_prep(struct io_kiocb *req, | ||||
| 			    const struct io_uring_sqe *sqe) | ||||
| { | ||||
|  | @ -6305,9 +6397,7 @@ static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) | |||
| { | ||||
| 	struct io_socket *sock = &req->sock; | ||||
| 
 | ||||
| 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) | ||||
| 		return -EINVAL; | ||||
| 	if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index) | ||||
| 	if (sqe->addr || sqe->rw_flags || sqe->buf_index) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	sock->domain = READ_ONCE(sqe->fd); | ||||
|  | @ -7755,6 +7845,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) | |||
| 		return io_getxattr_prep(req, sqe); | ||||
| 	case IORING_OP_SOCKET: | ||||
| 		return io_socket_prep(req, sqe); | ||||
| 	case IORING_OP_URING_CMD: | ||||
| 		return io_uring_cmd_prep(req, sqe); | ||||
| 	} | ||||
| 
 | ||||
| 	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", | ||||
|  | @ -7787,6 +7879,8 @@ static int io_req_prep_async(struct io_kiocb *req) | |||
| 		return io_recvmsg_prep_async(req); | ||||
| 	case IORING_OP_CONNECT: | ||||
| 		return io_connect_prep_async(req); | ||||
| 	case IORING_OP_URING_CMD: | ||||
| 		return io_uring_cmd_prep_async(req); | ||||
| 	} | ||||
| 	printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n", | ||||
| 		    req->opcode); | ||||
|  | @ -8081,6 +8175,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) | |||
| 	case IORING_OP_SOCKET: | ||||
| 		ret = io_socket(req, issue_flags); | ||||
| 		break; | ||||
| 	case IORING_OP_URING_CMD: | ||||
| 		ret = io_uring_cmd(req, issue_flags); | ||||
| 		break; | ||||
| 	default: | ||||
| 		ret = -EINVAL; | ||||
| 		break; | ||||
|  | @ -12699,6 +12796,8 @@ static int __init io_uring_init(void) | |||
| 
 | ||||
| 	BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32)); | ||||
| 
 | ||||
| 	BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64); | ||||
| 
 | ||||
| 	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | | ||||
| 				SLAB_ACCOUNT); | ||||
| 	return 0; | ||||
|  |  | |||
|  | @ -1953,6 +1953,7 @@ struct dir_context { | |||
| #define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN) | ||||
| 
 | ||||
| struct iov_iter; | ||||
| struct io_uring_cmd; | ||||
| 
 | ||||
| struct file_operations { | ||||
| 	struct module *owner; | ||||
|  | @ -1995,6 +1996,7 @@ struct file_operations { | |||
| 				   struct file *file_out, loff_t pos_out, | ||||
| 				   loff_t len, unsigned int remap_flags); | ||||
| 	int (*fadvise)(struct file *, loff_t, loff_t, int); | ||||
| 	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); | ||||
| } __randomize_layout; | ||||
| 
 | ||||
| struct inode_operations { | ||||
|  |  | |||
|  | @ -5,7 +5,32 @@ | |||
| #include <linux/sched.h> | ||||
| #include <linux/xarray.h> | ||||
| 
 | ||||
| enum io_uring_cmd_flags { | ||||
| 	IO_URING_F_COMPLETE_DEFER	= 1, | ||||
| 	IO_URING_F_UNLOCKED		= 2, | ||||
| 	/* int's last bit, sign checks are usually faster than a bit test */ | ||||
| 	IO_URING_F_NONBLOCK		= INT_MIN, | ||||
| 
 | ||||
| 	/* ctx state flags, for URING_CMD */ | ||||
| 	IO_URING_F_SQE128		= 4, | ||||
| 	IO_URING_F_CQE32		= 8, | ||||
| 	IO_URING_F_IOPOLL		= 16, | ||||
| }; | ||||
| 
 | ||||
| struct io_uring_cmd { | ||||
| 	struct file	*file; | ||||
| 	const void	*cmd; | ||||
| 	/* callback to defer completions to task context */ | ||||
| 	void (*task_work_cb)(struct io_uring_cmd *cmd); | ||||
| 	u32		cmd_op; | ||||
| 	u32		pad; | ||||
| 	u8		pdu[32]; /* available inline for free use */ | ||||
| }; | ||||
| 
 | ||||
| #if defined(CONFIG_IO_URING) | ||||
| void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); | ||||
| void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, | ||||
| 			void (*task_work_cb)(struct io_uring_cmd *)); | ||||
| struct sock *io_uring_get_socket(struct file *file); | ||||
| void __io_uring_cancel(bool cancel_all); | ||||
| void __io_uring_free(struct task_struct *tsk); | ||||
|  | @ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk) | |||
| 		__io_uring_free(tsk); | ||||
| } | ||||
| #else | ||||
| static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, | ||||
| 		ssize_t ret2) | ||||
| { | ||||
| } | ||||
| static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, | ||||
| 			void (*task_work_cb)(struct io_uring_cmd *)) | ||||
| { | ||||
| } | ||||
| static inline struct sock *io_uring_get_socket(struct file *file) | ||||
| { | ||||
| 	return NULL; | ||||
|  |  | |||
|  | @ -22,6 +22,7 @@ struct io_uring_sqe { | |||
| 	union { | ||||
| 		__u64	off;	/* offset into file */ | ||||
| 		__u64	addr2; | ||||
| 		__u32	cmd_op; | ||||
| 	}; | ||||
| 	union { | ||||
| 		__u64	addr;	/* pointer to buffer or iovecs */ | ||||
|  | @ -61,14 +62,17 @@ struct io_uring_sqe { | |||
| 		__s32	splice_fd_in; | ||||
| 		__u32	file_index; | ||||
| 	}; | ||||
| 	__u64	addr3; | ||||
| 	__u64	__pad2[1]; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the ring is initialized with IORING_SETUP_SQE128, then this field | ||||
| 	 * contains 64-bytes of padding, doubling the size of the SQE. | ||||
| 	 */ | ||||
| 	__u64	__big_sqe_pad[0]; | ||||
| 	union { | ||||
| 		struct { | ||||
| 			__u64	addr3; | ||||
| 			__u64	__pad2[1]; | ||||
| 		}; | ||||
| 		/*
 | ||||
| 		 * If the ring is initialized with IORING_SETUP_SQE128, then | ||||
| 		 * this field is used for 80 bytes of arbitrary command data | ||||
| 		 */ | ||||
| 		__u8	cmd[0]; | ||||
| 	}; | ||||
| }; | ||||
| 
 | ||||
| enum { | ||||
|  | @ -175,6 +179,7 @@ enum io_uring_op { | |||
| 	IORING_OP_FGETXATTR, | ||||
| 	IORING_OP_GETXATTR, | ||||
| 	IORING_OP_SOCKET, | ||||
| 	IORING_OP_URING_CMD, | ||||
| 
 | ||||
| 	/* this goes last, obviously */ | ||||
| 	IORING_OP_LAST, | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jens Axboe
						Jens Axboe