mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	fs,io_uring: add infrastructure for uring-cmd
file_operations->uring_cmd is a file private handler. This is somewhat similar to ioctl but hopefully a lot more sane and useful as it can be used to enable many io_uring capabilities for the underlying operation. IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't know what is in this command type, it's for the provider of ->uring_cmd() to deal with. Co-developed-by: Kanchan Joshi <joshi.k@samsung.com> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20220511054750.20432-2-joshi.k@samsung.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									2bb04df7c2
								
							
						
					
					
						commit
						ee692a21e9
					
				
					 4 changed files with 165 additions and 26 deletions
				
			
		
							
								
								
									
										135
									
								
								fs/io_uring.c
									
									
									
									
									
								
							
							
						
						
									
										135
									
								
								fs/io_uring.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -202,13 +202,6 @@ struct io_rings {
 | 
			
		|||
	struct io_uring_cqe	cqes[] ____cacheline_aligned_in_smp;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum io_uring_cmd_flags {
 | 
			
		||||
	IO_URING_F_COMPLETE_DEFER	= 1,
 | 
			
		||||
	IO_URING_F_UNLOCKED		= 2,
 | 
			
		||||
	/* int's last bit, sign checks are usually faster than a bit test */
 | 
			
		||||
	IO_URING_F_NONBLOCK		= INT_MIN,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct io_mapped_ubuf {
 | 
			
		||||
	u64		ubuf;
 | 
			
		||||
	u64		ubuf_end;
 | 
			
		||||
| 
						 | 
				
			
			@ -972,6 +965,7 @@ struct io_kiocb {
 | 
			
		|||
		struct io_xattr		xattr;
 | 
			
		||||
		struct io_socket	sock;
 | 
			
		||||
		struct io_nop		nop;
 | 
			
		||||
		struct io_uring_cmd	uring_cmd;
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	u8				opcode;
 | 
			
		||||
| 
						 | 
				
			
			@ -1050,6 +1044,14 @@ struct io_cancel_data {
 | 
			
		|||
	int seq;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into
 | 
			
		||||
 * the following sqe if SQE128 is used.
 | 
			
		||||
 */
 | 
			
		||||
#define uring_cmd_pdu_size(is_sqe128)				\
 | 
			
		||||
	((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) -	\
 | 
			
		||||
		offsetof(struct io_uring_sqe, cmd))
 | 
			
		||||
 | 
			
		||||
struct io_op_def {
 | 
			
		||||
	/* needs req->file assigned */
 | 
			
		||||
	unsigned		needs_file : 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -1289,6 +1291,12 @@ static const struct io_op_def io_op_defs[] = {
 | 
			
		|||
	[IORING_OP_SOCKET] = {
 | 
			
		||||
		.audit_skip		= 1,
 | 
			
		||||
	},
 | 
			
		||||
	[IORING_OP_URING_CMD] = {
 | 
			
		||||
		.needs_file		= 1,
 | 
			
		||||
		.plug			= 1,
 | 
			
		||||
		.needs_async_setup	= 1,
 | 
			
		||||
		.async_size		= uring_cmd_pdu_size(1),
 | 
			
		||||
	},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* requests with any of those set should undergo io_disarm_next() */
 | 
			
		||||
| 
						 | 
				
			
			@ -1428,6 +1436,8 @@ const char *io_uring_get_opcode(u8 opcode)
 | 
			
		|||
		return "GETXATTR";
 | 
			
		||||
	case IORING_OP_SOCKET:
 | 
			
		||||
		return "SOCKET";
 | 
			
		||||
	case IORING_OP_URING_CMD:
 | 
			
		||||
		return "URING_CMD";
 | 
			
		||||
	case IORING_OP_LAST:
 | 
			
		||||
		return "INVALID";
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -4507,10 +4517,6 @@ static int __io_getxattr_prep(struct io_kiocb *req,
 | 
			
		|||
	const char __user *name;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (unlikely(sqe->ioprio))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 | 
			
		||||
		return -EBADF;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -4620,10 +4626,6 @@ static int __io_setxattr_prep(struct io_kiocb *req,
 | 
			
		|||
	const char __user *name;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (unlikely(sqe->ioprio))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (unlikely(req->flags & REQ_F_FIXED_FILE))
 | 
			
		||||
		return -EBADF;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -4910,6 +4912,96 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
 | 
			
		||||
{
 | 
			
		||||
	req->uring_cmd.task_work_cb(&req->uring_cmd);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 | 
			
		||||
			void (*task_work_cb)(struct io_uring_cmd *))
 | 
			
		||||
{
 | 
			
		||||
	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
 | 
			
		||||
 | 
			
		||||
	req->uring_cmd.task_work_cb = task_work_cb;
 | 
			
		||||
	req->io_task_work.func = io_uring_cmd_work;
 | 
			
		||||
	io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Called by consumers of io_uring_cmd, if they originally returned
 | 
			
		||||
 * -EIOCBQUEUED upon receiving the command.
 | 
			
		||||
 */
 | 
			
		||||
void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
 | 
			
		||||
{
 | 
			
		||||
	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
 | 
			
		||||
 | 
			
		||||
	if (ret < 0)
 | 
			
		||||
		req_set_fail(req);
 | 
			
		||||
	if (req->ctx->flags & IORING_SETUP_CQE32)
 | 
			
		||||
		__io_req_complete32(req, 0, ret, 0, res2, 0);
 | 
			
		||||
	else
 | 
			
		||||
		io_req_complete(req, ret);
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
 | 
			
		||||
 | 
			
		||||
static int io_uring_cmd_prep_async(struct io_kiocb *req)
 | 
			
		||||
{
 | 
			
		||||
	size_t cmd_size;
 | 
			
		||||
 | 
			
		||||
	cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
 | 
			
		||||
 | 
			
		||||
	memcpy(req->async_data, req->uring_cmd.cmd, cmd_size);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int io_uring_cmd_prep(struct io_kiocb *req,
 | 
			
		||||
			     const struct io_uring_sqe *sqe)
 | 
			
		||||
{
 | 
			
		||||
	struct io_uring_cmd *ioucmd = &req->uring_cmd;
 | 
			
		||||
 | 
			
		||||
	if (sqe->rw_flags)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	ioucmd->cmd = sqe->cmd;
 | 
			
		||||
	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 | 
			
		||||
{
 | 
			
		||||
	struct io_uring_cmd *ioucmd = &req->uring_cmd;
 | 
			
		||||
	struct io_ring_ctx *ctx = req->ctx;
 | 
			
		||||
	struct file *file = req->file;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (!req->file->f_op->uring_cmd)
 | 
			
		||||
		return -EOPNOTSUPP;
 | 
			
		||||
 | 
			
		||||
	if (ctx->flags & IORING_SETUP_SQE128)
 | 
			
		||||
		issue_flags |= IO_URING_F_SQE128;
 | 
			
		||||
	if (ctx->flags & IORING_SETUP_CQE32)
 | 
			
		||||
		issue_flags |= IO_URING_F_CQE32;
 | 
			
		||||
	if (ctx->flags & IORING_SETUP_IOPOLL)
 | 
			
		||||
		issue_flags |= IO_URING_F_IOPOLL;
 | 
			
		||||
 | 
			
		||||
	if (req_has_async_data(req))
 | 
			
		||||
		ioucmd->cmd = req->async_data;
 | 
			
		||||
 | 
			
		||||
	ret = file->f_op->uring_cmd(ioucmd, issue_flags);
 | 
			
		||||
	if (ret == -EAGAIN) {
 | 
			
		||||
		if (!req_has_async_data(req)) {
 | 
			
		||||
			if (io_alloc_async_data(req))
 | 
			
		||||
				return -ENOMEM;
 | 
			
		||||
			io_uring_cmd_prep_async(req);
 | 
			
		||||
		}
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (ret != -EIOCBQUEUED)
 | 
			
		||||
		io_uring_cmd_done(ioucmd, ret, 0);
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int io_shutdown_prep(struct io_kiocb *req,
 | 
			
		||||
			    const struct io_uring_sqe *sqe)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -6305,9 +6397,7 @@ static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
			
		|||
{
 | 
			
		||||
	struct io_socket *sock = &req->sock;
 | 
			
		||||
 | 
			
		||||
	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
	if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
 | 
			
		||||
	if (sqe->addr || sqe->rw_flags || sqe->buf_index)
 | 
			
		||||
		return -EINVAL;
 | 
			
		||||
 | 
			
		||||
	sock->domain = READ_ONCE(sqe->fd);
 | 
			
		||||
| 
						 | 
				
			
			@ -7755,6 +7845,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 | 
			
		|||
		return io_getxattr_prep(req, sqe);
 | 
			
		||||
	case IORING_OP_SOCKET:
 | 
			
		||||
		return io_socket_prep(req, sqe);
 | 
			
		||||
	case IORING_OP_URING_CMD:
 | 
			
		||||
		return io_uring_cmd_prep(req, sqe);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
 | 
			
		||||
| 
						 | 
				
			
			@ -7787,6 +7879,8 @@ static int io_req_prep_async(struct io_kiocb *req)
 | 
			
		|||
		return io_recvmsg_prep_async(req);
 | 
			
		||||
	case IORING_OP_CONNECT:
 | 
			
		||||
		return io_connect_prep_async(req);
 | 
			
		||||
	case IORING_OP_URING_CMD:
 | 
			
		||||
		return io_uring_cmd_prep_async(req);
 | 
			
		||||
	}
 | 
			
		||||
	printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
 | 
			
		||||
		    req->opcode);
 | 
			
		||||
| 
						 | 
				
			
			@ -8081,6 +8175,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 | 
			
		|||
	case IORING_OP_SOCKET:
 | 
			
		||||
		ret = io_socket(req, issue_flags);
 | 
			
		||||
		break;
 | 
			
		||||
	case IORING_OP_URING_CMD:
 | 
			
		||||
		ret = io_uring_cmd(req, issue_flags);
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		ret = -EINVAL;
 | 
			
		||||
		break;
 | 
			
		||||
| 
						 | 
				
			
			@ -12699,6 +12796,8 @@ static int __init io_uring_init(void)
 | 
			
		|||
 | 
			
		||||
	BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
 | 
			
		||||
 | 
			
		||||
	BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
 | 
			
		||||
 | 
			
		||||
	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
 | 
			
		||||
				SLAB_ACCOUNT);
 | 
			
		||||
	return 0;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1953,6 +1953,7 @@ struct dir_context {
 | 
			
		|||
#define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
 | 
			
		||||
 | 
			
		||||
struct iov_iter;
 | 
			
		||||
struct io_uring_cmd;
 | 
			
		||||
 | 
			
		||||
struct file_operations {
 | 
			
		||||
	struct module *owner;
 | 
			
		||||
| 
						 | 
				
			
			@ -1995,6 +1996,7 @@ struct file_operations {
 | 
			
		|||
				   struct file *file_out, loff_t pos_out,
 | 
			
		||||
				   loff_t len, unsigned int remap_flags);
 | 
			
		||||
	int (*fadvise)(struct file *, loff_t, loff_t, int);
 | 
			
		||||
	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 | 
			
		||||
} __randomize_layout;
 | 
			
		||||
 | 
			
		||||
struct inode_operations {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,7 +5,32 @@
 | 
			
		|||
#include <linux/sched.h>
 | 
			
		||||
#include <linux/xarray.h>
 | 
			
		||||
 | 
			
		||||
enum io_uring_cmd_flags {
 | 
			
		||||
	IO_URING_F_COMPLETE_DEFER	= 1,
 | 
			
		||||
	IO_URING_F_UNLOCKED		= 2,
 | 
			
		||||
	/* int's last bit, sign checks are usually faster than a bit test */
 | 
			
		||||
	IO_URING_F_NONBLOCK		= INT_MIN,
 | 
			
		||||
 | 
			
		||||
	/* ctx state flags, for URING_CMD */
 | 
			
		||||
	IO_URING_F_SQE128		= 4,
 | 
			
		||||
	IO_URING_F_CQE32		= 8,
 | 
			
		||||
	IO_URING_F_IOPOLL		= 16,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct io_uring_cmd {
 | 
			
		||||
	struct file	*file;
 | 
			
		||||
	const void	*cmd;
 | 
			
		||||
	/* callback to defer completions to task context */
 | 
			
		||||
	void (*task_work_cb)(struct io_uring_cmd *cmd);
 | 
			
		||||
	u32		cmd_op;
 | 
			
		||||
	u32		pad;
 | 
			
		||||
	u8		pdu[32]; /* available inline for free use */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#if defined(CONFIG_IO_URING)
 | 
			
		||||
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
 | 
			
		||||
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 | 
			
		||||
			void (*task_work_cb)(struct io_uring_cmd *));
 | 
			
		||||
struct sock *io_uring_get_socket(struct file *file);
 | 
			
		||||
void __io_uring_cancel(bool cancel_all);
 | 
			
		||||
void __io_uring_free(struct task_struct *tsk);
 | 
			
		||||
| 
						 | 
				
			
			@ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
 | 
			
		|||
		__io_uring_free(tsk);
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
 | 
			
		||||
		ssize_t ret2)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 | 
			
		||||
			void (*task_work_cb)(struct io_uring_cmd *))
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
static inline struct sock *io_uring_get_socket(struct file *file)
 | 
			
		||||
{
 | 
			
		||||
	return NULL;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,6 +22,7 @@ struct io_uring_sqe {
 | 
			
		|||
	union {
 | 
			
		||||
		__u64	off;	/* offset into file */
 | 
			
		||||
		__u64	addr2;
 | 
			
		||||
		__u32	cmd_op;
 | 
			
		||||
	};
 | 
			
		||||
	union {
 | 
			
		||||
		__u64	addr;	/* pointer to buffer or iovecs */
 | 
			
		||||
| 
						 | 
				
			
			@ -61,14 +62,17 @@ struct io_uring_sqe {
 | 
			
		|||
		__s32	splice_fd_in;
 | 
			
		||||
		__u32	file_index;
 | 
			
		||||
	};
 | 
			
		||||
	__u64	addr3;
 | 
			
		||||
	__u64	__pad2[1];
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * If the ring is initialized with IORING_SETUP_SQE128, then this field
 | 
			
		||||
	 * contains 64-bytes of padding, doubling the size of the SQE.
 | 
			
		||||
	 */
 | 
			
		||||
	__u64	__big_sqe_pad[0];
 | 
			
		||||
	union {
 | 
			
		||||
		struct {
 | 
			
		||||
			__u64	addr3;
 | 
			
		||||
			__u64	__pad2[1];
 | 
			
		||||
		};
 | 
			
		||||
		/*
 | 
			
		||||
		 * If the ring is initialized with IORING_SETUP_SQE128, then
 | 
			
		||||
		 * this field is used for 80 bytes of arbitrary command data
 | 
			
		||||
		 */
 | 
			
		||||
		__u8	cmd[0];
 | 
			
		||||
	};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum {
 | 
			
		||||
| 
						 | 
				
			
			@ -175,6 +179,7 @@ enum io_uring_op {
 | 
			
		|||
	IORING_OP_FGETXATTR,
 | 
			
		||||
	IORING_OP_GETXATTR,
 | 
			
		||||
	IORING_OP_SOCKET,
 | 
			
		||||
	IORING_OP_URING_CMD,
 | 
			
		||||
 | 
			
		||||
	/* this goes last, obviously */
 | 
			
		||||
	IORING_OP_LAST,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue