mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 08:38:45 +02:00 
			
		
		
		
	 f6da4fee69
			
		
	
	
		f6da4fee69
		
	
	
	
	
		
			
			io_eventfd_grab() doesn't help wit understanding the path, it'll be simpler to keep the helper open coded. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/5cb53ce3876c2819db9e8055cf41dca4398521db.1745493845.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
		
			
				
	
	
		
			168 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			168 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/errno.h>
 | |
| #include <linux/mm.h>
 | |
| #include <linux/slab.h>
 | |
| #include <linux/eventfd.h>
 | |
| #include <linux/eventpoll.h>
 | |
| #include <linux/io_uring.h>
 | |
| #include <linux/io_uring_types.h>
 | |
| 
 | |
| #include "io-wq.h"
 | |
| #include "eventfd.h"
 | |
| 
 | |
| struct io_ev_fd {
 | |
| 	struct eventfd_ctx	*cq_ev_fd;
 | |
| 	unsigned int		eventfd_async;
 | |
| 	/* protected by ->completion_lock */
 | |
| 	unsigned		last_cq_tail;
 | |
| 	refcount_t		refs;
 | |
| 	atomic_t		ops;
 | |
| 	struct rcu_head		rcu;
 | |
| };
 | |
| 
 | |
| enum {
 | |
| 	IO_EVENTFD_OP_SIGNAL_BIT,
 | |
| };
 | |
| 
 | |
| static void io_eventfd_free(struct rcu_head *rcu)
 | |
| {
 | |
| 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
 | |
| 
 | |
| 	eventfd_ctx_put(ev_fd->cq_ev_fd);
 | |
| 	kfree(ev_fd);
 | |
| }
 | |
| 
 | |
| static void io_eventfd_put(struct io_ev_fd *ev_fd)
 | |
| {
 | |
| 	if (refcount_dec_and_test(&ev_fd->refs))
 | |
| 		call_rcu(&ev_fd->rcu, io_eventfd_free);
 | |
| }
 | |
| 
 | |
| static void io_eventfd_do_signal(struct rcu_head *rcu)
 | |
| {
 | |
| 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
 | |
| 
 | |
| 	eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
 | |
| 	io_eventfd_put(ev_fd);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Returns true if the caller should put the ev_fd reference, false if not.
 | |
|  */
 | |
| static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
 | |
| {
 | |
| 	if (eventfd_signal_allowed()) {
 | |
| 		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
 | |
| 		return true;
 | |
| 	}
 | |
| 	if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
 | |
| 		call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
 | |
| 		return false;
 | |
| 	}
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Trigger if eventfd_async isn't set, or if it's set and the caller is
 | |
|  * an async worker.
 | |
|  */
 | |
| static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
 | |
| {
 | |
| 	return !ev_fd->eventfd_async || io_wq_current_is_worker();
 | |
| }
 | |
| 
 | |
| void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
 | |
| {
 | |
| 	bool skip = false;
 | |
| 	struct io_ev_fd *ev_fd;
 | |
| 
 | |
| 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
 | |
| 		return;
 | |
| 
 | |
| 	guard(rcu)();
 | |
| 	ev_fd = rcu_dereference(ctx->io_ev_fd);
 | |
| 	/*
 | |
| 	 * Check again if ev_fd exists in case an io_eventfd_unregister call
 | |
| 	 * completed between the NULL check of ctx->io_ev_fd at the start of
 | |
| 	 * the function and rcu_read_lock.
 | |
| 	 */
 | |
| 	if (!ev_fd)
 | |
| 		return;
 | |
| 	if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
 | |
| 		return;
 | |
| 
 | |
| 	if (cqe_event) {
 | |
| 		/*
 | |
| 		 * Eventfd should only get triggered when at least one event
 | |
| 		 * has been posted. Some applications rely on the eventfd
 | |
| 		 * notification count only changing IFF a new CQE has been
 | |
| 		 * added to the CQ ring. There's no dependency on 1:1
 | |
| 		 * relationship between how many times this function is called
 | |
| 		 * (and hence the eventfd count) and number of CQEs posted to
 | |
| 		 * the CQ ring.
 | |
| 		 */
 | |
| 		spin_lock(&ctx->completion_lock);
 | |
| 		skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
 | |
| 		ev_fd->last_cq_tail = ctx->cached_cq_tail;
 | |
| 		spin_unlock(&ctx->completion_lock);
 | |
| 	}
 | |
| 
 | |
| 	if (skip || __io_eventfd_signal(ev_fd))
 | |
| 		io_eventfd_put(ev_fd);
 | |
| }
 | |
| 
 | |
| int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 | |
| 			unsigned int eventfd_async)
 | |
| {
 | |
| 	struct io_ev_fd *ev_fd;
 | |
| 	__s32 __user *fds = arg;
 | |
| 	int fd;
 | |
| 
 | |
| 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
 | |
| 					lockdep_is_held(&ctx->uring_lock));
 | |
| 	if (ev_fd)
 | |
| 		return -EBUSY;
 | |
| 
 | |
| 	if (copy_from_user(&fd, fds, sizeof(*fds)))
 | |
| 		return -EFAULT;
 | |
| 
 | |
| 	ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
 | |
| 	if (!ev_fd)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
 | |
| 	if (IS_ERR(ev_fd->cq_ev_fd)) {
 | |
| 		int ret = PTR_ERR(ev_fd->cq_ev_fd);
 | |
| 
 | |
| 		kfree(ev_fd);
 | |
| 		return ret;
 | |
| 	}
 | |
| 
 | |
| 	spin_lock(&ctx->completion_lock);
 | |
| 	ev_fd->last_cq_tail = ctx->cached_cq_tail;
 | |
| 	spin_unlock(&ctx->completion_lock);
 | |
| 
 | |
| 	ev_fd->eventfd_async = eventfd_async;
 | |
| 	ctx->has_evfd = true;
 | |
| 	refcount_set(&ev_fd->refs, 1);
 | |
| 	atomic_set(&ev_fd->ops, 0);
 | |
| 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| int io_eventfd_unregister(struct io_ring_ctx *ctx)
 | |
| {
 | |
| 	struct io_ev_fd *ev_fd;
 | |
| 
 | |
| 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
 | |
| 					lockdep_is_held(&ctx->uring_lock));
 | |
| 	if (ev_fd) {
 | |
| 		ctx->has_evfd = false;
 | |
| 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
 | |
| 		io_eventfd_put(ev_fd);
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	return -ENXIO;
 | |
| }
 |