forked from mirrors/linux
		
	net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high. When someone wants to iterate over alive net namespaces, he/she has no a possibility to do that without exclusive lock. But the exclusive rtnl_lock() in such places is overkill, and it just increases the contention. Yes, there is already for_each_net_rcu() in kernel, but it requires rcu_read_lock(), and this can't be sleepable. Also, sometimes it may be need really prevent net_namespace_list growth, so for_each_net_rcu() is not fit there. This patch introduces new rw_semaphore, which will be used instead of rtnl_mutex to protect net_namespace_list. It is sleepable and allows not-exclusive iterations over net namespaces list. It allows to stop using rtnl_lock() in several places (what is made in next patches) and makes less the time, we keep rtnl_mutex. Here we just add new lock, while the explanation of we can remove rtnl_lock() there are in next patches. Fine grained locks generally are better, then one big lock, so let's do that with net_namespace_list, while the situation allows that. Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									906edee91e
								
							
						
					
					
						commit
						f0b07bb151
					
				
					 11 changed files with 37 additions and 5 deletions
				
			
		| 
						 | 
					@ -403,10 +403,12 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
 | 
				
			||||||
	 * our feet
 | 
						 * our feet
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	rtnl_lock();
 | 
						rtnl_lock();
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net)
 | 
						for_each_net(net)
 | 
				
			||||||
		for_each_netdev(net, ndev)
 | 
							for_each_netdev(net, ndev)
 | 
				
			||||||
			if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
 | 
								if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
 | 
				
			||||||
				add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
 | 
									add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -37,6 +37,7 @@ extern int rtnl_lock_killable(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern wait_queue_head_t netdev_unregistering_wq;
 | 
					extern wait_queue_head_t netdev_unregistering_wq;
 | 
				
			||||||
extern struct rw_semaphore pernet_ops_rwsem;
 | 
					extern struct rw_semaphore pernet_ops_rwsem;
 | 
				
			||||||
 | 
					extern struct rw_semaphore net_rwsem;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_PROVE_LOCKING
 | 
					#ifdef CONFIG_PROVE_LOCKING
 | 
				
			||||||
extern bool lockdep_rtnl_is_held(void);
 | 
					extern bool lockdep_rtnl_is_held(void);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -291,6 +291,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Protected by net_rwsem */
 | 
				
			||||||
#define for_each_net(VAR)				\
 | 
					#define for_each_net(VAR)				\
 | 
				
			||||||
	list_for_each_entry(VAR, &net_namespace_list, list)
 | 
						list_for_each_entry(VAR, &net_namespace_list, list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1629,6 +1629,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 | 
				
			||||||
		goto unlock;
 | 
							goto unlock;
 | 
				
			||||||
	if (dev_boot_phase)
 | 
						if (dev_boot_phase)
 | 
				
			||||||
		goto unlock;
 | 
							goto unlock;
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net) {
 | 
						for_each_net(net) {
 | 
				
			||||||
		for_each_netdev(net, dev) {
 | 
							for_each_netdev(net, dev) {
 | 
				
			||||||
			err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
 | 
								err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
 | 
				
			||||||
| 
						 | 
					@ -1642,6 +1643,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 | 
				
			||||||
			call_netdevice_notifier(nb, NETDEV_UP, dev);
 | 
								call_netdevice_notifier(nb, NETDEV_UP, dev);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
unlock:
 | 
					unlock:
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
| 
						 | 
					@ -1664,6 +1666,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
outroll:
 | 
					outroll:
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
	raw_notifier_chain_unregister(&netdev_chain, nb);
 | 
						raw_notifier_chain_unregister(&netdev_chain, nb);
 | 
				
			||||||
	goto unlock;
 | 
						goto unlock;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1694,6 +1697,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 | 
				
			||||||
	if (err)
 | 
						if (err)
 | 
				
			||||||
		goto unlock;
 | 
							goto unlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net) {
 | 
						for_each_net(net) {
 | 
				
			||||||
		for_each_netdev(net, dev) {
 | 
							for_each_netdev(net, dev) {
 | 
				
			||||||
			if (dev->flags & IFF_UP) {
 | 
								if (dev->flags & IFF_UP) {
 | 
				
			||||||
| 
						 | 
					@ -1704,6 +1708,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 | 
				
			||||||
			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 | 
								call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
unlock:
 | 
					unlock:
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
	return err;
 | 
						return err;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,6 +33,7 @@ static unsigned int fib_seq_sum(void)
 | 
				
			||||||
	struct net *net;
 | 
						struct net *net;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rtnl_lock();
 | 
						rtnl_lock();
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net) {
 | 
						for_each_net(net) {
 | 
				
			||||||
		rcu_read_lock();
 | 
							rcu_read_lock();
 | 
				
			||||||
		list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
 | 
							list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
 | 
				
			||||||
| 
						 | 
					@ -43,6 +44,7 @@ static unsigned int fib_seq_sum(void)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		rcu_read_unlock();
 | 
							rcu_read_unlock();
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return fib_seq;
 | 
						return fib_seq;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list;
 | 
				
			||||||
LIST_HEAD(net_namespace_list);
 | 
					LIST_HEAD(net_namespace_list);
 | 
				
			||||||
EXPORT_SYMBOL_GPL(net_namespace_list);
 | 
					EXPORT_SYMBOL_GPL(net_namespace_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Protects net_namespace_list. Nests iside rtnl_lock() */
 | 
				
			||||||
 | 
					DECLARE_RWSEM(net_rwsem);
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(net_rwsem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct net init_net = {
 | 
					struct net init_net = {
 | 
				
			||||||
	.count		= REFCOUNT_INIT(1),
 | 
						.count		= REFCOUNT_INIT(1),
 | 
				
			||||||
	.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
 | 
						.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
 | 
				
			||||||
| 
						 | 
					@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 | 
				
			||||||
		if (error < 0)
 | 
							if (error < 0)
 | 
				
			||||||
			goto out_undo;
 | 
								goto out_undo;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	rtnl_lock();
 | 
						down_write(&net_rwsem);
 | 
				
			||||||
	list_add_tail_rcu(&net->list, &net_namespace_list);
 | 
						list_add_tail_rcu(&net->list, &net_namespace_list);
 | 
				
			||||||
	rtnl_unlock();
 | 
						up_write(&net_rwsem);
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	return error;
 | 
						return error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last)
 | 
				
			||||||
	 * and this work is the only process, that may delete
 | 
						 * and this work is the only process, that may delete
 | 
				
			||||||
	 * a net from net_namespace_list. So, when the below
 | 
						 * a net from net_namespace_list. So, when the below
 | 
				
			||||||
	 * is executing, the list may only grow. Thus, we do not
 | 
						 * is executing, the list may only grow. Thus, we do not
 | 
				
			||||||
	 * use for_each_net_rcu() or rtnl_lock().
 | 
						 * use for_each_net_rcu() or net_rwsem.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	for_each_net(tmp) {
 | 
						for_each_net(tmp) {
 | 
				
			||||||
		int id;
 | 
							int id;
 | 
				
			||||||
| 
						 | 
					@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work)
 | 
				
			||||||
	down_read(&pernet_ops_rwsem);
 | 
						down_read(&pernet_ops_rwsem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Don't let anyone else find us. */
 | 
						/* Don't let anyone else find us. */
 | 
				
			||||||
	rtnl_lock();
 | 
						down_write(&net_rwsem);
 | 
				
			||||||
	llist_for_each_entry(net, net_kill_list, cleanup_list)
 | 
						llist_for_each_entry(net, net_kill_list, cleanup_list)
 | 
				
			||||||
		list_del_rcu(&net->list);
 | 
							list_del_rcu(&net->list);
 | 
				
			||||||
	/* Cache last net. After we unlock rtnl, no one new net
 | 
						/* Cache last net. After we unlock rtnl, no one new net
 | 
				
			||||||
| 
						 | 
					@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work)
 | 
				
			||||||
	 * useless anyway, as netns_ids are destroyed there.
 | 
						 * useless anyway, as netns_ids are destroyed there.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	last = list_last_entry(&net_namespace_list, struct net, list);
 | 
						last = list_last_entry(&net_namespace_list, struct net, list);
 | 
				
			||||||
	rtnl_unlock();
 | 
						up_write(&net_rwsem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	llist_for_each_entry(net, net_kill_list, cleanup_list) {
 | 
						llist_for_each_entry(net, net_kill_list, cleanup_list) {
 | 
				
			||||||
		unhash_nsid(net, last);
 | 
							unhash_nsid(net, last);
 | 
				
			||||||
| 
						 | 
					@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_add_tail(&ops->list, list);
 | 
						list_add_tail(&ops->list, list);
 | 
				
			||||||
	if (ops->init || (ops->id && ops->size)) {
 | 
						if (ops->init || (ops->id && ops->size)) {
 | 
				
			||||||
 | 
							/* We held write locked pernet_ops_rwsem, and parallel
 | 
				
			||||||
 | 
							 * setup_net() and cleanup_net() are not possible.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
		for_each_net(net) {
 | 
							for_each_net(net) {
 | 
				
			||||||
			error = ops_init(ops, net);
 | 
								error = ops_init(ops, net);
 | 
				
			||||||
			if (error)
 | 
								if (error)
 | 
				
			||||||
| 
						 | 
					@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
 | 
				
			||||||
	LIST_HEAD(net_exit_list);
 | 
						LIST_HEAD(net_exit_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_del(&ops->list);
 | 
						list_del(&ops->list);
 | 
				
			||||||
 | 
						/* See comment in __register_pernet_operations() */
 | 
				
			||||||
	for_each_net(net)
 | 
						for_each_net(net)
 | 
				
			||||||
		list_add_tail(&net->exit_list, &net_exit_list);
 | 
							list_add_tail(&net->exit_list, &net_exit_list);
 | 
				
			||||||
	ops_exit_list(ops, &net_exit_list);
 | 
						ops_exit_list(ops, &net_exit_list);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -418,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct net *net;
 | 
						struct net *net;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net) {
 | 
						for_each_net(net) {
 | 
				
			||||||
		__rtnl_kill_links(net, ops);
 | 
							__rtnl_kill_links(net, ops);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
	list_del(&ops->list);
 | 
						list_del(&ops->list);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
 | 
					EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
 | 
				
			||||||
| 
						 | 
					@ -438,6 +440,9 @@ static void rtnl_lock_unregistering_all(void)
 | 
				
			||||||
	for (;;) {
 | 
						for (;;) {
 | 
				
			||||||
		unregistering = false;
 | 
							unregistering = false;
 | 
				
			||||||
		rtnl_lock();
 | 
							rtnl_lock();
 | 
				
			||||||
 | 
							/* We held write locked pernet_ops_rwsem, and parallel
 | 
				
			||||||
 | 
							 * setup_net() and cleanup_net() are not possible.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
		for_each_net(net) {
 | 
							for_each_net(net) {
 | 
				
			||||||
			if (net->dev_unreg_count > 0) {
 | 
								if (net->dev_unreg_count > 0) {
 | 
				
			||||||
				unregistering = true;
 | 
									unregistering = true;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1764,12 +1764,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 | 
				
			||||||
	struct net *net;
 | 
						struct net *net;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rtnl_lock();
 | 
						rtnl_lock();
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net) {
 | 
						for_each_net(net) {
 | 
				
			||||||
		if (atomic_read(&net->ct.count) == 0)
 | 
							if (atomic_read(&net->ct.count) == 0)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		__nf_ct_unconfirmed_destroy(net);
 | 
							__nf_ct_unconfirmed_destroy(net);
 | 
				
			||||||
		nf_queue_nf_hook_drop(net);
 | 
							nf_queue_nf_hook_drop(net);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Need to wait for netns cleanup worker to finish, if its
 | 
						/* Need to wait for netns cleanup worker to finish, if its
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2364,8 +2364,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
 | 
				
			||||||
		__dp_destroy(dp);
 | 
							__dp_destroy(dp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rtnl_lock();
 | 
						rtnl_lock();
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net)
 | 
						for_each_net(net)
 | 
				
			||||||
		list_vports_from_net(net, dnet, &head);
 | 
							list_vports_from_net(net, dnet, &head);
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Detach all vports from given namespace. */
 | 
						/* Detach all vports from given namespace. */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -349,11 +349,13 @@ void wireless_nlevent_flush(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ASSERT_RTNL();
 | 
						ASSERT_RTNL();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net) {
 | 
						for_each_net(net) {
 | 
				
			||||||
		while ((skb = skb_dequeue(&net->wext_nlevents)))
 | 
							while ((skb = skb_dequeue(&net->wext_nlevents)))
 | 
				
			||||||
			rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
 | 
								rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
 | 
				
			||||||
				    GFP_KERNEL);
 | 
									    GFP_KERNEL);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
 | 
					EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -48,8 +48,10 @@ static inline void selinux_xfrm_notify_policyload(void)
 | 
				
			||||||
	struct net *net;
 | 
						struct net *net;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rtnl_lock();
 | 
						rtnl_lock();
 | 
				
			||||||
 | 
						down_read(&net_rwsem);
 | 
				
			||||||
	for_each_net(net)
 | 
						for_each_net(net)
 | 
				
			||||||
		rt_genid_bump_all(net);
 | 
							rt_genid_bump_all(net);
 | 
				
			||||||
 | 
						up_read(&net_rwsem);
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue