forked from mirrors/linux
		
	fasync: RCU and fine grained locking
kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.
fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.
Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.
We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									e5700aff14
								
							
						
					
					
						commit
						989a297920
					
				
					 3 changed files with 59 additions and 92 deletions
				
			
		
							
								
								
									
										52
									
								
								fs/fcntl.c
									
									
									
									
									
								
							
							
						
						
									
										52
									
								
								fs/fcntl.c
									
									
									
									
									
								
							| 
						 | 
					@ -614,9 +614,15 @@ int send_sigurg(struct fown_struct *fown)
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static DEFINE_RWLOCK(fasync_lock);
 | 
					static DEFINE_SPINLOCK(fasync_lock);
 | 
				
			||||||
static struct kmem_cache *fasync_cache __read_mostly;
 | 
					static struct kmem_cache *fasync_cache __read_mostly;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void fasync_free_rcu(struct rcu_head *head)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						kmem_cache_free(fasync_cache,
 | 
				
			||||||
 | 
								container_of(head, struct fasync_struct, fa_rcu));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Remove a fasync entry. If successfully removed, return
 | 
					 * Remove a fasync entry. If successfully removed, return
 | 
				
			||||||
 * positive and clear the FASYNC flag. If no entry exists,
 | 
					 * positive and clear the FASYNC flag. If no entry exists,
 | 
				
			||||||
| 
						 | 
					@ -625,8 +631,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
 | 
				
			||||||
 * NOTE! It is very important that the FASYNC flag always
 | 
					 * NOTE! It is very important that the FASYNC flag always
 | 
				
			||||||
 * match the state "is the filp on a fasync list".
 | 
					 * match the state "is the filp on a fasync list".
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * We always take the 'filp->f_lock', in since fasync_lock
 | 
					 | 
				
			||||||
 * needs to be irq-safe.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 | 
					static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -634,17 +638,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 | 
				
			||||||
	int result = 0;
 | 
						int result = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&filp->f_lock);
 | 
						spin_lock(&filp->f_lock);
 | 
				
			||||||
	write_lock_irq(&fasync_lock);
 | 
						spin_lock(&fasync_lock);
 | 
				
			||||||
	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 | 
						for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 | 
				
			||||||
		if (fa->fa_file != filp)
 | 
							if (fa->fa_file != filp)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_lock_irq(&fa->fa_lock);
 | 
				
			||||||
 | 
							fa->fa_file = NULL;
 | 
				
			||||||
 | 
							spin_unlock_irq(&fa->fa_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		*fp = fa->fa_next;
 | 
							*fp = fa->fa_next;
 | 
				
			||||||
		kmem_cache_free(fasync_cache, fa);
 | 
							call_rcu(&fa->fa_rcu, fasync_free_rcu);
 | 
				
			||||||
		filp->f_flags &= ~FASYNC;
 | 
							filp->f_flags &= ~FASYNC;
 | 
				
			||||||
		result = 1;
 | 
							result = 1;
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	write_unlock_irq(&fasync_lock);
 | 
						spin_unlock(&fasync_lock);
 | 
				
			||||||
	spin_unlock(&filp->f_lock);
 | 
						spin_unlock(&filp->f_lock);
 | 
				
			||||||
	return result;
 | 
						return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -666,25 +675,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
 | 
				
			||||||
		return -ENOMEM;
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&filp->f_lock);
 | 
						spin_lock(&filp->f_lock);
 | 
				
			||||||
	write_lock_irq(&fasync_lock);
 | 
						spin_lock(&fasync_lock);
 | 
				
			||||||
	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 | 
						for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 | 
				
			||||||
		if (fa->fa_file != filp)
 | 
							if (fa->fa_file != filp)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_lock_irq(&fa->fa_lock);
 | 
				
			||||||
		fa->fa_fd = fd;
 | 
							fa->fa_fd = fd;
 | 
				
			||||||
 | 
							spin_unlock_irq(&fa->fa_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		kmem_cache_free(fasync_cache, new);
 | 
							kmem_cache_free(fasync_cache, new);
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock_init(&new->fa_lock);
 | 
				
			||||||
	new->magic = FASYNC_MAGIC;
 | 
						new->magic = FASYNC_MAGIC;
 | 
				
			||||||
	new->fa_file = filp;
 | 
						new->fa_file = filp;
 | 
				
			||||||
	new->fa_fd = fd;
 | 
						new->fa_fd = fd;
 | 
				
			||||||
	new->fa_next = *fapp;
 | 
						new->fa_next = *fapp;
 | 
				
			||||||
	*fapp = new;
 | 
						rcu_assign_pointer(*fapp, new);
 | 
				
			||||||
	result = 1;
 | 
						result = 1;
 | 
				
			||||||
	filp->f_flags |= FASYNC;
 | 
						filp->f_flags |= FASYNC;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	write_unlock_irq(&fasync_lock);
 | 
						spin_unlock(&fasync_lock);
 | 
				
			||||||
	spin_unlock(&filp->f_lock);
 | 
						spin_unlock(&filp->f_lock);
 | 
				
			||||||
	return result;
 | 
						return result;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -704,7 +718,10 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
EXPORT_SYMBOL(fasync_helper);
 | 
					EXPORT_SYMBOL(fasync_helper);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 | 
					/*
 | 
				
			||||||
 | 
					 * rcu_read_lock() is held
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	while (fa) {
 | 
						while (fa) {
 | 
				
			||||||
		struct fown_struct *fown;
 | 
							struct fown_struct *fown;
 | 
				
			||||||
| 
						 | 
					@ -713,28 +730,29 @@ void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 | 
				
			||||||
			       "fasync_struct!\n");
 | 
								       "fasync_struct!\n");
 | 
				
			||||||
			return;
 | 
								return;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							spin_lock(&fa->fa_lock);
 | 
				
			||||||
 | 
							if (fa->fa_file) {
 | 
				
			||||||
			fown = &fa->fa_file->f_owner;
 | 
								fown = &fa->fa_file->f_owner;
 | 
				
			||||||
			/* Don't send SIGURG to processes which have not set a
 | 
								/* Don't send SIGURG to processes which have not set a
 | 
				
			||||||
			   queued signum: SIGURG has its own default signalling
 | 
								   queued signum: SIGURG has its own default signalling
 | 
				
			||||||
			   mechanism. */
 | 
								   mechanism. */
 | 
				
			||||||
			if (!(sig == SIGURG && fown->signum == 0))
 | 
								if (!(sig == SIGURG && fown->signum == 0))
 | 
				
			||||||
				send_sigio(fown, fa->fa_fd, band);
 | 
									send_sigio(fown, fa->fa_fd, band);
 | 
				
			||||||
		fa = fa->fa_next;
 | 
							}
 | 
				
			||||||
 | 
							spin_unlock(&fa->fa_lock);
 | 
				
			||||||
 | 
							fa = rcu_dereference(fa->fa_next);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
EXPORT_SYMBOL(__kill_fasync);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void kill_fasync(struct fasync_struct **fp, int sig, int band)
 | 
					void kill_fasync(struct fasync_struct **fp, int sig, int band)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/* First a quick test without locking: usually
 | 
						/* First a quick test without locking: usually
 | 
				
			||||||
	 * the list is empty.
 | 
						 * the list is empty.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (*fp) {
 | 
						if (*fp) {
 | 
				
			||||||
		read_lock(&fasync_lock);
 | 
							rcu_read_lock();
 | 
				
			||||||
		/* reread *fp after obtaining the lock */
 | 
							kill_fasync_rcu(rcu_dereference(*fp), sig, band);
 | 
				
			||||||
		__kill_fasync(*fp, sig, band);
 | 
							rcu_read_unlock();
 | 
				
			||||||
		read_unlock(&fasync_lock);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL(kill_fasync);
 | 
					EXPORT_SYMBOL(kill_fasync);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1280,10 +1280,12 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct fasync_struct {
 | 
					struct fasync_struct {
 | 
				
			||||||
 | 
						spinlock_t		fa_lock;
 | 
				
			||||||
	int			magic;
 | 
						int			magic;
 | 
				
			||||||
	int			fa_fd;
 | 
						int			fa_fd;
 | 
				
			||||||
	struct fasync_struct	*fa_next; /* singly linked list */
 | 
						struct fasync_struct	*fa_next; /* singly linked list */
 | 
				
			||||||
	struct file		*fa_file;
 | 
						struct file		*fa_file;
 | 
				
			||||||
 | 
						struct rcu_head		fa_rcu;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define FASYNC_MAGIC 0x4601
 | 
					#define FASYNC_MAGIC 0x4601
 | 
				
			||||||
| 
						 | 
					@ -1292,8 +1294,6 @@ struct fasync_struct {
 | 
				
			||||||
extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
 | 
					extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
 | 
				
			||||||
/* can be called from interrupts */
 | 
					/* can be called from interrupts */
 | 
				
			||||||
extern void kill_fasync(struct fasync_struct **, int, int);
 | 
					extern void kill_fasync(struct fasync_struct **, int, int);
 | 
				
			||||||
/* only for net: no internal synchronization */
 | 
					 | 
				
			||||||
extern void __kill_fasync(struct fasync_struct *, int, int);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 | 
					extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 | 
				
			||||||
extern int f_setown(struct file *filp, unsigned long arg, int force);
 | 
					extern int f_setown(struct file *filp, unsigned long arg, int force);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										71
									
								
								net/socket.c
									
									
									
									
									
								
							
							
						
						
									
										71
									
								
								net/socket.c
									
									
									
									
									
								
							| 
						 | 
					@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp)
 | 
				
			||||||
 *	1. fasync_list is modified only under process context socket lock
 | 
					 *	1. fasync_list is modified only under process context socket lock
 | 
				
			||||||
 *	   i.e. under semaphore.
 | 
					 *	   i.e. under semaphore.
 | 
				
			||||||
 *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
 | 
					 *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
 | 
				
			||||||
 *	   or under socket lock.
 | 
					 *	   or under socket lock
 | 
				
			||||||
 *	3. fasync_list can be used from softirq context, so that
 | 
					 | 
				
			||||||
 *	   modification under socket lock have to be enhanced with
 | 
					 | 
				
			||||||
 *	   write_lock_bh(&sk->sk_callback_lock).
 | 
					 | 
				
			||||||
 *							--ANK (990710)
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int sock_fasync(int fd, struct file *filp, int on)
 | 
					static int sock_fasync(int fd, struct file *filp, int on)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct fasync_struct *fa, *fna = NULL, **prev;
 | 
						struct socket *sock = filp->private_data;
 | 
				
			||||||
	struct socket *sock;
 | 
						struct sock *sk = sock->sk;
 | 
				
			||||||
	struct sock *sk;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (on) {
 | 
						if (sk == NULL)
 | 
				
			||||||
		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 | 
					 | 
				
			||||||
		if (fna == NULL)
 | 
					 | 
				
			||||||
			return -ENOMEM;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	sock = filp->private_data;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	sk = sock->sk;
 | 
					 | 
				
			||||||
	if (sk == NULL) {
 | 
					 | 
				
			||||||
		kfree(fna);
 | 
					 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	lock_sock(sk);
 | 
						lock_sock(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&filp->f_lock);
 | 
						fasync_helper(fd, filp, on, &sock->fasync_list);
 | 
				
			||||||
	if (on)
 | 
					 | 
				
			||||||
		filp->f_flags |= FASYNC;
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
		filp->f_flags &= ~FASYNC;
 | 
					 | 
				
			||||||
	spin_unlock(&filp->f_lock);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	prev = &(sock->fasync_list);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
 | 
					 | 
				
			||||||
		if (fa->fa_file == filp)
 | 
					 | 
				
			||||||
			break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (on) {
 | 
					 | 
				
			||||||
		if (fa != NULL) {
 | 
					 | 
				
			||||||
			write_lock_bh(&sk->sk_callback_lock);
 | 
					 | 
				
			||||||
			fa->fa_fd = fd;
 | 
					 | 
				
			||||||
			write_unlock_bh(&sk->sk_callback_lock);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			kfree(fna);
 | 
					 | 
				
			||||||
			goto out;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		fna->fa_file = filp;
 | 
					 | 
				
			||||||
		fna->fa_fd = fd;
 | 
					 | 
				
			||||||
		fna->magic = FASYNC_MAGIC;
 | 
					 | 
				
			||||||
		fna->fa_next = sock->fasync_list;
 | 
					 | 
				
			||||||
		write_lock_bh(&sk->sk_callback_lock);
 | 
					 | 
				
			||||||
		sock->fasync_list = fna;
 | 
					 | 
				
			||||||
		sock_set_flag(sk, SOCK_FASYNC);
 | 
					 | 
				
			||||||
		write_unlock_bh(&sk->sk_callback_lock);
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		if (fa != NULL) {
 | 
					 | 
				
			||||||
			write_lock_bh(&sk->sk_callback_lock);
 | 
					 | 
				
			||||||
			*prev = fa->fa_next;
 | 
					 | 
				
			||||||
	if (!sock->fasync_list)
 | 
						if (!sock->fasync_list)
 | 
				
			||||||
		sock_reset_flag(sk, SOCK_FASYNC);
 | 
							sock_reset_flag(sk, SOCK_FASYNC);
 | 
				
			||||||
			write_unlock_bh(&sk->sk_callback_lock);
 | 
						else
 | 
				
			||||||
			kfree(fa);
 | 
							sock_set_flag(sk, SOCK_FASYNC);
 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
out:
 | 
						release_sock(sk);
 | 
				
			||||||
	release_sock(sock->sk);
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
 | 
				
			||||||
		/* fall through */
 | 
							/* fall through */
 | 
				
			||||||
	case SOCK_WAKE_IO:
 | 
						case SOCK_WAKE_IO:
 | 
				
			||||||
call_kill:
 | 
					call_kill:
 | 
				
			||||||
		__kill_fasync(sock->fasync_list, SIGIO, band);
 | 
							kill_fasync(&sock->fasync_list, SIGIO, band);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	case SOCK_WAKE_URG:
 | 
						case SOCK_WAKE_URG:
 | 
				
			||||||
		__kill_fasync(sock->fasync_list, SIGURG, band);
 | 
							kill_fasync(&sock->fasync_list, SIGURG, band);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue