forked from mirrors/linux
		
	io_uring: add support for fixed wait regions
Generally applications have 1 or a few waits of waiting, yet they pass
in a struct io_uring_getevents_arg every time. This needs to get copied
and, in turn, the timeout value needs to get copied.
Rather than do this for every invocation, allow the application to
register a fixed set of wait regions that can simply be indexed when
asking the kernel to wait on events.
At ring setup time, the application can register a number of these wait
regions and initialize region/index 0 upfront:
	struct io_uring_reg_wait *reg;
	reg = io_uring_setup_reg_wait(ring, nr_regions, &ret);
	/* set timeout and mark as set, sigmask/sigmask_sz as needed */
	reg->ts.tv_sec = 0;
	reg->ts.tv_nsec = 100000;
	reg->flags = IORING_REG_WAIT_TS;
where nr_regions >= 1 && nr_regions <= PAGE_SIZE / sizeof(*reg). The
above initializes index 0, but 63 other regions can be initialized,
if needed. Now, instead of doing:
	struct __kernel_timespec timeout = { .tv_nsec = 100000, };
	io_uring_submit_and_wait_timeout(ring, &cqe, nr, &t, NULL);
to wait for events for each submit_and_wait, or just wait, operation, it
can just reference the above region at offset 0 and do:
	io_uring_submit_and_wait_reg(ring, &cqe, nr, 0);
to achieve the same goal of waiting 100usec without needing to copy
both struct io_uring_getevents_arg (24b) and struct __kernel_timeout
(16b) for each invocation. Struct io_uring_reg_wait looks as follows:
struct io_uring_reg_wait {
	struct __kernel_timespec	ts;
	__u32				min_wait_usec;
	__u32				flags;
	__u64				sigmask;
	__u32				sigmask_sz;
	__u32				pad[3];
	__u64				pad2[2];
};
embedding the timeout itself in the region, rather than passing it as
a pointer as well. Note that the signal mask is still passed as a
pointer, both for compatability reasons, but also because there doesn't
seem to be a lot of high frequency waits scenarios that involve setting
and resetting the signal mask for each wait.
The application is free to modify any region before a wait call, or it
can use keep multiple regions with different settings to avoid needing to
modify the same one for wait calls. Up to a page size of regions is mapped
by default, allowing PAGE_SIZE / 64 available regions for use.
The registered region must fit within a page. On a 4kb page size system,
that allows for 64 wait regions if a full page is used, as the size of
struct io_uring_reg_wait is 64b. The region registered must be aligned
to io_uring_reg_wait in size. It's valid to register less than 64
entries.
In network performance testing with zero-copy, this reduced the time
spent waiting on the TX side from 3.12% to 0.3% and the RX side from 4.4%
to 0.3%.
Wait regions are fixed for the lifetime of the ring - once registered,
they are persistent until the ring is torn down. The regions support
minimum wait timeout as well as the regular waits.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
			
			
This commit is contained in:
		
							parent
							
								
									371b47da25
								
							
						
					
					
						commit
						aa00f67adc
					
				
					 5 changed files with 192 additions and 12 deletions
				
			
		| 
						 | 
					@ -327,6 +327,14 @@ struct io_ring_ctx {
 | 
				
			||||||
		atomic_t		cq_wait_nr;
 | 
							atomic_t		cq_wait_nr;
 | 
				
			||||||
		atomic_t		cq_timeouts;
 | 
							atomic_t		cq_timeouts;
 | 
				
			||||||
		struct wait_queue_head	cq_wait;
 | 
							struct wait_queue_head	cq_wait;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * If registered with IORING_REGISTER_CQWAIT_REG, a single
 | 
				
			||||||
 | 
							 * page holds N entries, mapped in cq_wait_arg. cq_wait_index
 | 
				
			||||||
 | 
							 * is the maximum allowable index.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							struct io_uring_reg_wait	*cq_wait_arg;
 | 
				
			||||||
 | 
							unsigned char			cq_wait_index;
 | 
				
			||||||
	} ____cacheline_aligned_in_smp;
 | 
						} ____cacheline_aligned_in_smp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* timeouts */
 | 
						/* timeouts */
 | 
				
			||||||
| 
						 | 
					@ -430,6 +438,8 @@ struct io_ring_ctx {
 | 
				
			||||||
	unsigned short			n_sqe_pages;
 | 
						unsigned short			n_sqe_pages;
 | 
				
			||||||
	struct page			**ring_pages;
 | 
						struct page			**ring_pages;
 | 
				
			||||||
	struct page			**sqe_pages;
 | 
						struct page			**sqe_pages;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						struct page			**cq_wait_page;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct io_tw_state {
 | 
					struct io_tw_state {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -518,6 +518,7 @@ struct io_cqring_offsets {
 | 
				
			||||||
#define IORING_ENTER_EXT_ARG		(1U << 3)
 | 
					#define IORING_ENTER_EXT_ARG		(1U << 3)
 | 
				
			||||||
#define IORING_ENTER_REGISTERED_RING	(1U << 4)
 | 
					#define IORING_ENTER_REGISTERED_RING	(1U << 4)
 | 
				
			||||||
#define IORING_ENTER_ABS_TIMER		(1U << 5)
 | 
					#define IORING_ENTER_ABS_TIMER		(1U << 5)
 | 
				
			||||||
 | 
					#define IORING_ENTER_EXT_ARG_REG	(1U << 6)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Passed in for io_uring_setup(2). Copied back with updated info on success
 | 
					 * Passed in for io_uring_setup(2). Copied back with updated info on success
 | 
				
			||||||
| 
						 | 
					@ -620,6 +621,9 @@ enum io_uring_register_op {
 | 
				
			||||||
	/* resize CQ ring */
 | 
						/* resize CQ ring */
 | 
				
			||||||
	IORING_REGISTER_RESIZE_RINGS		= 33,
 | 
						IORING_REGISTER_RESIZE_RINGS		= 33,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* register fixed io_uring_reg_wait arguments */
 | 
				
			||||||
 | 
						IORING_REGISTER_CQWAIT_REG		= 34,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* this goes last */
 | 
						/* this goes last */
 | 
				
			||||||
	IORING_REGISTER_LAST,
 | 
						IORING_REGISTER_LAST,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -803,6 +807,43 @@ enum io_uring_register_restriction_op {
 | 
				
			||||||
	IORING_RESTRICTION_LAST
 | 
						IORING_RESTRICTION_LAST
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum {
 | 
				
			||||||
 | 
						IORING_REG_WAIT_TS		= (1U << 0),
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Argument for IORING_REGISTER_CQWAIT_REG, registering a region of
 | 
				
			||||||
 | 
					 * struct io_uring_reg_wait that can be indexed when io_uring_enter(2) is
 | 
				
			||||||
 | 
					 * called rather than pass in a wait argument structure separately.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct io_uring_cqwait_reg_arg {
 | 
				
			||||||
 | 
						__u32		flags;
 | 
				
			||||||
 | 
						__u32		struct_size;
 | 
				
			||||||
 | 
						__u32		nr_entries;
 | 
				
			||||||
 | 
						__u32		pad;
 | 
				
			||||||
 | 
						__u64		user_addr;
 | 
				
			||||||
 | 
						__u64		pad2[3];
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Argument for io_uring_enter(2) with
 | 
				
			||||||
 | 
					 * IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG set, where the actual argument
 | 
				
			||||||
 | 
					 * is an index into a previously registered fixed wait region described by
 | 
				
			||||||
 | 
					 * the below structure.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct io_uring_reg_wait {
 | 
				
			||||||
 | 
						struct __kernel_timespec	ts;
 | 
				
			||||||
 | 
						__u32				min_wait_usec;
 | 
				
			||||||
 | 
						__u32				flags;
 | 
				
			||||||
 | 
						__u64				sigmask;
 | 
				
			||||||
 | 
						__u32				sigmask_sz;
 | 
				
			||||||
 | 
						__u32				pad[3];
 | 
				
			||||||
 | 
						__u64				pad2[2];
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Argument for io_uring_enter(2) with IORING_GETEVENTS | IORING_ENTER_EXT_ARG
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
struct io_uring_getevents_arg {
 | 
					struct io_uring_getevents_arg {
 | 
				
			||||||
	__u64	sigmask;
 | 
						__u64	sigmask;
 | 
				
			||||||
	__u32	sigmask_sz;
 | 
						__u32	sigmask_sz;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2736,6 +2736,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 | 
				
			||||||
	io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
 | 
						io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
 | 
				
			||||||
	io_futex_cache_free(ctx);
 | 
						io_futex_cache_free(ctx);
 | 
				
			||||||
	io_destroy_buffers(ctx);
 | 
						io_destroy_buffers(ctx);
 | 
				
			||||||
 | 
						io_unregister_cqwait_reg(ctx);
 | 
				
			||||||
	mutex_unlock(&ctx->uring_lock);
 | 
						mutex_unlock(&ctx->uring_lock);
 | 
				
			||||||
	if (ctx->sq_creds)
 | 
						if (ctx->sq_creds)
 | 
				
			||||||
		put_cred(ctx->sq_creds);
 | 
							put_cred(ctx->sq_creds);
 | 
				
			||||||
| 
						 | 
					@ -3224,21 +3225,43 @@ void __io_uring_cancel(bool cancel_all)
 | 
				
			||||||
	io_uring_cancel_generic(cancel_all, NULL);
 | 
						io_uring_cancel_generic(cancel_all, NULL);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz)
 | 
					static struct io_uring_reg_wait *io_get_ext_arg_reg(struct io_ring_ctx *ctx,
 | 
				
			||||||
 | 
								const struct io_uring_getevents_arg __user *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct io_uring_reg_wait *arg = READ_ONCE(ctx->cq_wait_arg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (arg) {
 | 
				
			||||||
 | 
							unsigned int index = (unsigned int) (uintptr_t) uarg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (index <= ctx->cq_wait_index)
 | 
				
			||||||
 | 
								return arg + index;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ERR_PTR(-EFAULT);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int io_validate_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
 | 
				
			||||||
 | 
								       const void __user *argp, size_t argsz)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (flags & IORING_ENTER_EXT_ARG) {
 | 
					 | 
				
			||||||
	struct io_uring_getevents_arg arg;
 | 
						struct io_uring_getevents_arg arg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!(flags & IORING_ENTER_EXT_ARG))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (flags & IORING_ENTER_EXT_ARG_REG) {
 | 
				
			||||||
 | 
							if (argsz != sizeof(struct io_uring_reg_wait))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							return PTR_ERR(io_get_ext_arg_reg(ctx, argp));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	if (argsz != sizeof(arg))
 | 
						if (argsz != sizeof(arg))
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	if (copy_from_user(&arg, argp, sizeof(arg)))
 | 
						if (copy_from_user(&arg, argp, sizeof(arg)))
 | 
				
			||||||
		return -EFAULT;
 | 
							return -EFAULT;
 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int io_get_ext_arg(unsigned flags, const void __user *argp,
 | 
					static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
 | 
				
			||||||
			  struct ext_arg *ext_arg)
 | 
								  const void __user *argp, struct ext_arg *ext_arg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	const struct io_uring_getevents_arg __user *uarg = argp;
 | 
						const struct io_uring_getevents_arg __user *uarg = argp;
 | 
				
			||||||
	struct io_uring_getevents_arg arg;
 | 
						struct io_uring_getevents_arg arg;
 | 
				
			||||||
| 
						 | 
					@ -3252,6 +3275,28 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp,
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (flags & IORING_ENTER_EXT_ARG_REG) {
 | 
				
			||||||
 | 
							struct io_uring_reg_wait *w;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (ext_arg->argsz != sizeof(struct io_uring_reg_wait))
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							w = io_get_ext_arg_reg(ctx, argp);
 | 
				
			||||||
 | 
							if (IS_ERR(w))
 | 
				
			||||||
 | 
								return PTR_ERR(w);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (w->flags & ~IORING_REG_WAIT_TS)
 | 
				
			||||||
 | 
								return -EINVAL;
 | 
				
			||||||
 | 
							ext_arg->min_time = READ_ONCE(w->min_wait_usec) * NSEC_PER_USEC;
 | 
				
			||||||
 | 
							ext_arg->sig = u64_to_user_ptr(READ_ONCE(w->sigmask));
 | 
				
			||||||
 | 
							ext_arg->argsz = READ_ONCE(w->sigmask_sz);
 | 
				
			||||||
 | 
							if (w->flags & IORING_REG_WAIT_TS) {
 | 
				
			||||||
 | 
								ext_arg->ts.tv_sec = READ_ONCE(w->ts.tv_sec);
 | 
				
			||||||
 | 
								ext_arg->ts.tv_nsec = READ_ONCE(w->ts.tv_nsec);
 | 
				
			||||||
 | 
								ext_arg->ts_set = true;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * EXT_ARG is set - ensure we agree on the size of it and copy in our
 | 
						 * EXT_ARG is set - ensure we agree on the size of it and copy in our
 | 
				
			||||||
	 * timespec and sigset_t pointers if good.
 | 
						 * timespec and sigset_t pointers if good.
 | 
				
			||||||
| 
						 | 
					@ -3297,7 +3342,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 | 
				
			||||||
	if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
 | 
						if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
 | 
				
			||||||
			       IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
 | 
								       IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
 | 
				
			||||||
			       IORING_ENTER_REGISTERED_RING |
 | 
								       IORING_ENTER_REGISTERED_RING |
 | 
				
			||||||
			       IORING_ENTER_ABS_TIMER)))
 | 
								       IORING_ENTER_ABS_TIMER |
 | 
				
			||||||
 | 
								       IORING_ENTER_EXT_ARG_REG)))
 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -3380,7 +3426,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			mutex_lock(&ctx->uring_lock);
 | 
								mutex_lock(&ctx->uring_lock);
 | 
				
			||||||
iopoll_locked:
 | 
					iopoll_locked:
 | 
				
			||||||
			ret2 = io_validate_ext_arg(flags, argp, argsz);
 | 
								ret2 = io_validate_ext_arg(ctx, flags, argp, argsz);
 | 
				
			||||||
			if (likely(!ret2)) {
 | 
								if (likely(!ret2)) {
 | 
				
			||||||
				min_complete = min(min_complete,
 | 
									min_complete = min(min_complete,
 | 
				
			||||||
						   ctx->cq_entries);
 | 
											   ctx->cq_entries);
 | 
				
			||||||
| 
						 | 
					@ -3390,7 +3436,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			struct ext_arg ext_arg = { .argsz = argsz };
 | 
								struct ext_arg ext_arg = { .argsz = argsz };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			ret2 = io_get_ext_arg(flags, argp, &ext_arg);
 | 
								ret2 = io_get_ext_arg(ctx, flags, argp, &ext_arg);
 | 
				
			||||||
			if (likely(!ret2)) {
 | 
								if (likely(!ret2)) {
 | 
				
			||||||
				min_complete = min(min_complete,
 | 
									min_complete = min(min_complete,
 | 
				
			||||||
						   ctx->cq_entries);
 | 
											   ctx->cq_entries);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -570,6 +570,82 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void io_unregister_cqwait_reg(struct io_ring_ctx *ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						unsigned short npages = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!ctx->cq_wait_page)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						io_pages_unmap(ctx->cq_wait_arg, &ctx->cq_wait_page, &npages, true);
 | 
				
			||||||
 | 
						ctx->cq_wait_arg = NULL;
 | 
				
			||||||
 | 
						if (ctx->user)
 | 
				
			||||||
 | 
							__io_unaccount_mem(ctx->user, 1);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Register a page holding N entries of struct io_uring_reg_wait, which can
 | 
				
			||||||
 | 
					 * be used via io_uring_enter(2) if IORING_GETEVENTS_EXT_ARG_REG is set.
 | 
				
			||||||
 | 
					 * If that is set with IORING_GETEVENTS_EXT_ARG, then instead of passing
 | 
				
			||||||
 | 
					 * in a pointer for a struct io_uring_getevents_arg, an index into this
 | 
				
			||||||
 | 
					 * registered array is passed, avoiding two (arg + timeout) copies per
 | 
				
			||||||
 | 
					 * invocation.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int io_register_cqwait_reg(struct io_ring_ctx *ctx, void __user *uarg)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct io_uring_cqwait_reg_arg arg;
 | 
				
			||||||
 | 
						struct io_uring_reg_wait *reg;
 | 
				
			||||||
 | 
						struct page **pages;
 | 
				
			||||||
 | 
						unsigned long len;
 | 
				
			||||||
 | 
						int nr_pages, poff;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ctx->cq_wait_page || ctx->cq_wait_arg)
 | 
				
			||||||
 | 
							return -EBUSY;
 | 
				
			||||||
 | 
						if (copy_from_user(&arg, uarg, sizeof(arg)))
 | 
				
			||||||
 | 
							return -EFAULT;
 | 
				
			||||||
 | 
						if (!arg.nr_entries || arg.flags)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
						if (arg.struct_size != sizeof(*reg))
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
						if (check_mul_overflow(arg.struct_size, arg.nr_entries, &len))
 | 
				
			||||||
 | 
							return -EOVERFLOW;
 | 
				
			||||||
 | 
						if (len > PAGE_SIZE)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
						/* offset + len must fit within a page, and must be reg_wait aligned */
 | 
				
			||||||
 | 
						poff = arg.user_addr & ~PAGE_MASK;
 | 
				
			||||||
 | 
						if (len + poff > PAGE_SIZE)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
						if (poff % arg.struct_size)
 | 
				
			||||||
 | 
							return -EINVAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pages = io_pin_pages(arg.user_addr, len, &nr_pages);
 | 
				
			||||||
 | 
						if (IS_ERR(pages))
 | 
				
			||||||
 | 
							return PTR_ERR(pages);
 | 
				
			||||||
 | 
						ret = -EINVAL;
 | 
				
			||||||
 | 
						if (nr_pages != 1)
 | 
				
			||||||
 | 
							goto out_free;
 | 
				
			||||||
 | 
						if (ctx->user) {
 | 
				
			||||||
 | 
							ret = __io_account_mem(ctx->user, 1);
 | 
				
			||||||
 | 
							if (ret)
 | 
				
			||||||
 | 
								goto out_free;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						reg = vmap(pages, 1, VM_MAP, PAGE_KERNEL);
 | 
				
			||||||
 | 
						if (reg) {
 | 
				
			||||||
 | 
							ctx->cq_wait_index = arg.nr_entries - 1;
 | 
				
			||||||
 | 
							WRITE_ONCE(ctx->cq_wait_page, pages);
 | 
				
			||||||
 | 
							WRITE_ONCE(ctx->cq_wait_arg, (void *) reg + poff);
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						ret = -ENOMEM;
 | 
				
			||||||
 | 
						if (ctx->user)
 | 
				
			||||||
 | 
							__io_unaccount_mem(ctx->user, 1);
 | 
				
			||||||
 | 
					out_free:
 | 
				
			||||||
 | 
						io_pages_free(&pages, nr_pages);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 | 
					static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 | 
				
			||||||
			       void __user *arg, unsigned nr_args)
 | 
								       void __user *arg, unsigned nr_args)
 | 
				
			||||||
	__releases(ctx->uring_lock)
 | 
						__releases(ctx->uring_lock)
 | 
				
			||||||
| 
						 | 
					@ -764,6 +840,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		ret = io_register_resize_rings(ctx, arg);
 | 
							ret = io_register_resize_rings(ctx, arg);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
 | 
						case IORING_REGISTER_CQWAIT_REG:
 | 
				
			||||||
 | 
							ret = -EINVAL;
 | 
				
			||||||
 | 
							if (!arg || nr_args != 1)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							ret = io_register_cqwait_reg(ctx, arg);
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
	default:
 | 
						default:
 | 
				
			||||||
		ret = -EINVAL;
 | 
							ret = -EINVAL;
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,5 +5,6 @@
 | 
				
			||||||
int io_eventfd_unregister(struct io_ring_ctx *ctx);
 | 
					int io_eventfd_unregister(struct io_ring_ctx *ctx);
 | 
				
			||||||
int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id);
 | 
					int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id);
 | 
				
			||||||
struct file *io_uring_register_get_file(unsigned int fd, bool registered);
 | 
					struct file *io_uring_register_get_file(unsigned int fd, bool registered);
 | 
				
			||||||
 | 
					void io_unregister_cqwait_reg(struct io_ring_ctx *ctx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue