forked from mirrors/linux
		
	net: do not create fallback tunnels for non-default namespaces
fallback tunnels (like tunl0, gre0, gretap0, erspan0, sit0, ip6tnl0, ip6gre0) are automatically created when the corresponding module is loaded. These tunnels are also automatically created when a new network namespace is created, at a great cost. In many cases, netns are used for isolation purposes, and these extra network devices are a waste of resources. We are using thousands of netns per host, and hit the netns creation/delete bottleneck a lot. (Many thanks to Kirill for recent work on this) Add a new sysctl so that we can opt-out from this automatic creation. Note that these tunnels are still created for the initial namespace, to be the least intrusive for typical setups. Tested: lpk43:~# cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m37.521s user 0m0.886s sys 7m7.084s lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net lpk43:~# time ./add_del_unshare.sh real 0m4.761s user 0m0.851s sys 1m8.343s lpk43:~# Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									2b3905de8b
								
							
						
					
					
						commit
						79134e6ce2
					
				
					 8 changed files with 54 additions and 10 deletions
				
			
		| 
						 | 
					@ -270,6 +270,18 @@ optmem_max
 | 
				
			||||||
Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
 | 
					Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
 | 
				
			||||||
of struct cmsghdr structures with appended data.
 | 
					of struct cmsghdr structures with appended data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fb_tunnels_only_for_init_net
 | 
				
			||||||
 | 
					----------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
 | 
				
			||||||
 | 
					sit0, ip6tnl0, ip6gre0) are automatically created when a new
 | 
				
			||||||
 | 
					network namespace is created, if corresponding tunnel is present
 | 
				
			||||||
 | 
					in initial network namespace.
 | 
				
			||||||
 | 
					If set to 1, these devices are not automatically created, and
 | 
				
			||||||
 | 
					user space is responsible for creating them if needed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Default : 0  (for compatibility reasons)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
2. /proc/sys/net/unix - Parameters for Unix domain sockets
 | 
					2. /proc/sys/net/unix - Parameters for Unix domain sockets
 | 
				
			||||||
-------------------------------------------------------
 | 
					-------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -585,6 +585,13 @@ struct netdev_queue {
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
} ____cacheline_aligned_in_smp;
 | 
					} ____cacheline_aligned_in_smp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					extern int sysctl_fb_tunnels_only_for_init_net;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline bool net_has_fallback_tunnels(const struct net *net)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return net == &init_net || !sysctl_fb_tunnels_only_for_init_net;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 | 
					static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
 | 
					#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -180,8 +180,10 @@ struct tnl_ptk_info {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct ip_tunnel_net {
 | 
					struct ip_tunnel_net {
 | 
				
			||||||
	struct net_device *fb_tunnel_dev;
 | 
						struct net_device *fb_tunnel_dev;
 | 
				
			||||||
 | 
						struct rtnl_link_ops *rtnl_link_ops;
 | 
				
			||||||
	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 | 
						struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 | 
				
			||||||
	struct ip_tunnel __rcu *collect_md_tun;
 | 
						struct ip_tunnel __rcu *collect_md_tun;
 | 
				
			||||||
 | 
						int type;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
 | 
					static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int net_msg_warn;	/* Unused, but still a sysctl */
 | 
					static int net_msg_warn;	/* Unused, but still a sysctl */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
 | 
				
			||||||
 | 
					EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_RPS
 | 
					#ifdef CONFIG_RPS
 | 
				
			||||||
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 | 
					static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 | 
				
			||||||
				void __user *buffer, size_t *lenp, loff_t *ppos)
 | 
									void __user *buffer, size_t *lenp, loff_t *ppos)
 | 
				
			||||||
| 
						 | 
					@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
 | 
				
			||||||
		.proc_handler	= proc_dointvec_minmax,
 | 
							.proc_handler	= proc_dointvec_minmax,
 | 
				
			||||||
		.extra1		= &zero,
 | 
							.extra1		= &zero,
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							.procname	= "fb_tunnels_only_for_init_net",
 | 
				
			||||||
 | 
							.data		= &sysctl_fb_tunnels_only_for_init_net,
 | 
				
			||||||
 | 
							.maxlen		= sizeof(int),
 | 
				
			||||||
 | 
							.mode		= 0644,
 | 
				
			||||||
 | 
							.proc_handler	= proc_dointvec_minmax,
 | 
				
			||||||
 | 
							.extra1		= &zero,
 | 
				
			||||||
 | 
							.extra2		= &one,
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
	{ }
 | 
						{ }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -347,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
 | 
				
			||||||
	struct net_device *dev;
 | 
						struct net_device *dev;
 | 
				
			||||||
	int t_hlen;
 | 
						int t_hlen;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(!itn->fb_tunnel_dev);
 | 
						dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 | 
				
			||||||
	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
 | 
					 | 
				
			||||||
	if (IS_ERR(dev))
 | 
						if (IS_ERR(dev))
 | 
				
			||||||
		return ERR_CAST(dev);
 | 
							return ERR_CAST(dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -822,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 | 
				
			||||||
	struct net *net = t->net;
 | 
						struct net *net = t->net;
 | 
				
			||||||
	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 | 
						struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(!itn->fb_tunnel_dev);
 | 
					 | 
				
			||||||
	switch (cmd) {
 | 
						switch (cmd) {
 | 
				
			||||||
	case SIOCGETTUNNEL:
 | 
						case SIOCGETTUNNEL:
 | 
				
			||||||
		if (dev == itn->fb_tunnel_dev) {
 | 
							if (dev == itn->fb_tunnel_dev) {
 | 
				
			||||||
| 
						 | 
					@ -847,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 | 
				
			||||||
				p->o_key = 0;
 | 
									p->o_key = 0;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 | 
							t = ip_tunnel_find(itn, p, itn->type);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (cmd == SIOCADDTUNNEL) {
 | 
							if (cmd == SIOCADDTUNNEL) {
 | 
				
			||||||
			if (!t) {
 | 
								if (!t) {
 | 
				
			||||||
| 
						 | 
					@ -991,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 | 
				
			||||||
	struct ip_tunnel_parm parms;
 | 
						struct ip_tunnel_parm parms;
 | 
				
			||||||
	unsigned int i;
 | 
						unsigned int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						itn->rtnl_link_ops = ops;
 | 
				
			||||||
	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
 | 
						for (i = 0; i < IP_TNL_HASH_SIZE; i++)
 | 
				
			||||||
		INIT_HLIST_HEAD(&itn->tunnels[i]);
 | 
							INIT_HLIST_HEAD(&itn->tunnels[i]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!ops) {
 | 
						if (!ops || !net_has_fallback_tunnels(net)) {
 | 
				
			||||||
 | 
							struct ip_tunnel_net *it_init_net;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							it_init_net = net_generic(&init_net, ip_tnl_net_id);
 | 
				
			||||||
 | 
							itn->type = it_init_net->type;
 | 
				
			||||||
		itn->fb_tunnel_dev = NULL;
 | 
							itn->fb_tunnel_dev = NULL;
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -1012,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 | 
				
			||||||
		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 | 
							itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 | 
				
			||||||
		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
 | 
							itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
 | 
				
			||||||
		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
 | 
							ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
 | 
				
			||||||
 | 
							itn->type = itn->fb_tunnel_dev->type;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1019,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
 | 
					EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
 | 
					static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
 | 
				
			||||||
 | 
								      struct list_head *head,
 | 
				
			||||||
			      struct rtnl_link_ops *ops)
 | 
								      struct rtnl_link_ops *ops)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct net *net = dev_net(itn->fb_tunnel_dev);
 | 
					 | 
				
			||||||
	struct net_device *dev, *aux;
 | 
						struct net_device *dev, *aux;
 | 
				
			||||||
	int h;
 | 
						int h;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1054,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
 | 
				
			||||||
	rtnl_lock();
 | 
						rtnl_lock();
 | 
				
			||||||
	list_for_each_entry(net, net_list, exit_list) {
 | 
						list_for_each_entry(net, net_list, exit_list) {
 | 
				
			||||||
		itn = net_generic(net, id);
 | 
							itn = net_generic(net, id);
 | 
				
			||||||
		ip_tunnel_destroy(itn, &list, ops);
 | 
							ip_tunnel_destroy(net, itn, &list, ops);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	unregister_netdevice_many(&list);
 | 
						unregister_netdevice_many(&list);
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -236,7 +236,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 | 
				
			||||||
		return t;
 | 
							return t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dev = ign->fb_tunnel_dev;
 | 
						dev = ign->fb_tunnel_dev;
 | 
				
			||||||
	if (dev->flags & IFF_UP)
 | 
						if (dev && dev->flags & IFF_UP)
 | 
				
			||||||
		return netdev_priv(dev);
 | 
							return netdev_priv(dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return NULL;
 | 
						return NULL;
 | 
				
			||||||
| 
						 | 
					@ -1472,6 +1472,8 @@ static int __net_init ip6gre_init_net(struct net *net)
 | 
				
			||||||
	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 | 
						struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!net_has_fallback_tunnels(net))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
 | 
						ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
 | 
				
			||||||
					  NET_NAME_UNKNOWN,
 | 
										  NET_NAME_UNKNOWN,
 | 
				
			||||||
					  ip6gre_tunnel_setup);
 | 
										  ip6gre_tunnel_setup);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2205,6 +2205,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 | 
				
			||||||
	ip6n->tnls[0] = ip6n->tnls_wc;
 | 
						ip6n->tnls[0] = ip6n->tnls_wc;
 | 
				
			||||||
	ip6n->tnls[1] = ip6n->tnls_r_l;
 | 
						ip6n->tnls[1] = ip6n->tnls_r_l;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!net_has_fallback_tunnels(net))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
	err = -ENOMEM;
 | 
						err = -ENOMEM;
 | 
				
			||||||
	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
 | 
						ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
 | 
				
			||||||
					NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
 | 
										NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 | 
				
			||||||
#ifdef CONFIG_IPV6_SIT_6RD
 | 
					#ifdef CONFIG_IPV6_SIT_6RD
 | 
				
			||||||
	struct ip_tunnel *t = netdev_priv(dev);
 | 
						struct ip_tunnel *t = netdev_priv(dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (dev == sitn->fb_tunnel_dev) {
 | 
						if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
 | 
				
			||||||
		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
 | 
							ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
 | 
				
			||||||
		t->ip6rd.relay_prefix = 0;
 | 
							t->ip6rd.relay_prefix = 0;
 | 
				
			||||||
		t->ip6rd.prefixlen = 16;
 | 
							t->ip6rd.prefixlen = 16;
 | 
				
			||||||
| 
						 | 
					@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
 | 
				
			||||||
	sitn->tunnels[2] = sitn->tunnels_r;
 | 
						sitn->tunnels[2] = sitn->tunnels_r;
 | 
				
			||||||
	sitn->tunnels[3] = sitn->tunnels_r_l;
 | 
						sitn->tunnels[3] = sitn->tunnels_r_l;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!net_has_fallback_tunnels(net))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
 | 
						sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
 | 
				
			||||||
					   NET_NAME_UNKNOWN,
 | 
										   NET_NAME_UNKNOWN,
 | 
				
			||||||
					   ipip6_tunnel_setup);
 | 
										   ipip6_tunnel_setup);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue