mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	ipv6: give an IPv6 dev to blackhole_netdev
IPv6 addrconf notifiers wants the loopback device to be the last device being dismantled at netns deletion. This caused many limitations and work arounds. Back in linux-5.3, Mahesh added a per host blackhole_netdev that can be used whenever we need to make sure objects no longer refer to a disappearing device. If we attach to blackhole_netdev an ip6_ptr (allocate an idev), then we can use this special device (which is never freed) in place of the loopback_dev (which can be freed). This will permit improvements in netdev_run_todo() and other parts of the stack where had steps to make sure loopback_dev was the last device to disappear. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Mahesh Bandewar <maheshb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									2d4feb2c1b
								
							
						
					
					
						commit
						e5f80fcf86
					
				
					 2 changed files with 40 additions and 59 deletions
				
			
		| 
						 | 
					@ -372,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ASSERT_RTNL();
 | 
						ASSERT_RTNL();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (dev->mtu < IPV6_MIN_MTU)
 | 
						if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
 | 
				
			||||||
		return ERR_PTR(-EINVAL);
 | 
							return ERR_PTR(-EINVAL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
 | 
						ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
 | 
				
			||||||
| 
						 | 
					@ -400,21 +400,22 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 | 
				
			||||||
	/* We refer to the device */
 | 
						/* We refer to the device */
 | 
				
			||||||
	dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);
 | 
						dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (snmp6_alloc_dev(ndev) < 0) {
 | 
						if (dev != blackhole_netdev) {
 | 
				
			||||||
		netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
 | 
							if (snmp6_alloc_dev(ndev) < 0) {
 | 
				
			||||||
			   __func__);
 | 
								netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
 | 
				
			||||||
		neigh_parms_release(&nd_tbl, ndev->nd_parms);
 | 
									   __func__);
 | 
				
			||||||
		dev_put_track(dev, &ndev->dev_tracker);
 | 
								neigh_parms_release(&nd_tbl, ndev->nd_parms);
 | 
				
			||||||
		kfree(ndev);
 | 
								dev_put_track(dev, &ndev->dev_tracker);
 | 
				
			||||||
		return ERR_PTR(err);
 | 
								kfree(ndev);
 | 
				
			||||||
	}
 | 
								return ERR_PTR(err);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (snmp6_register_dev(ndev) < 0) {
 | 
							if (snmp6_register_dev(ndev) < 0) {
 | 
				
			||||||
		netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
 | 
								netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
 | 
				
			||||||
			   __func__, dev->name);
 | 
									   __func__, dev->name);
 | 
				
			||||||
		goto err_release;
 | 
								goto err_release;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* One reference from device. */
 | 
						/* One reference from device. */
 | 
				
			||||||
	refcount_set(&ndev->refcnt, 1);
 | 
						refcount_set(&ndev->refcnt, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -445,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ipv6_mc_init_dev(ndev);
 | 
						ipv6_mc_init_dev(ndev);
 | 
				
			||||||
	ndev->tstamp = jiffies;
 | 
						ndev->tstamp = jiffies;
 | 
				
			||||||
	err = addrconf_sysctl_register(ndev);
 | 
						if (dev != blackhole_netdev) {
 | 
				
			||||||
	if (err) {
 | 
							err = addrconf_sysctl_register(ndev);
 | 
				
			||||||
		ipv6_mc_destroy_dev(ndev);
 | 
							if (err) {
 | 
				
			||||||
		snmp6_unregister_dev(ndev);
 | 
								ipv6_mc_destroy_dev(ndev);
 | 
				
			||||||
		goto err_release;
 | 
								snmp6_unregister_dev(ndev);
 | 
				
			||||||
 | 
								goto err_release;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	/* protected by rtnl_lock */
 | 
						/* protected by rtnl_lock */
 | 
				
			||||||
	rcu_assign_pointer(dev->ip6_ptr, ndev);
 | 
						rcu_assign_pointer(dev->ip6_ptr, ndev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Join interface-local all-node multicast group */
 | 
						if (dev != blackhole_netdev) {
 | 
				
			||||||
	ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
 | 
							/* Join interface-local all-node multicast group */
 | 
				
			||||||
 | 
							ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Join all-node multicast group */
 | 
							/* Join all-node multicast group */
 | 
				
			||||||
	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 | 
							ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Join all-router multicast group if forwarding is set */
 | 
					 | 
				
			||||||
	if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
 | 
					 | 
				
			||||||
		ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Join all-router multicast group if forwarding is set */
 | 
				
			||||||
 | 
							if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
 | 
				
			||||||
 | 
								ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	return ndev;
 | 
						return ndev;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
err_release:
 | 
					err_release:
 | 
				
			||||||
| 
						 | 
					@ -7233,26 +7237,8 @@ int __init addrconf_init(void)
 | 
				
			||||||
		goto out_nowq;
 | 
							goto out_nowq;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* The addrconf netdev notifier requires that loopback_dev
 | 
					 | 
				
			||||||
	 * has it's ipv6 private information allocated and setup
 | 
					 | 
				
			||||||
	 * before it can bring up and give link-local addresses
 | 
					 | 
				
			||||||
	 * to other devices which are up.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * Unfortunately, loopback_dev is not necessarily the first
 | 
					 | 
				
			||||||
	 * entry in the global dev_base list of net devices.  In fact,
 | 
					 | 
				
			||||||
	 * it is likely to be the very last entry on that list.
 | 
					 | 
				
			||||||
	 * So this causes the notifier registry below to try and
 | 
					 | 
				
			||||||
	 * give link-local addresses to all devices besides loopback_dev
 | 
					 | 
				
			||||||
	 * first, then loopback_dev, which cases all the non-loopback_dev
 | 
					 | 
				
			||||||
	 * devices to fail to get a link-local address.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * So, as a temporary fix, allocate the ipv6 structure for
 | 
					 | 
				
			||||||
	 * loopback_dev first by hand.
 | 
					 | 
				
			||||||
	 * Longer term, all of the dependencies ipv6 has upon the loopback
 | 
					 | 
				
			||||||
	 * device and it being up should be removed.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	rtnl_lock();
 | 
						rtnl_lock();
 | 
				
			||||||
	idev = ipv6_add_dev(init_net.loopback_dev);
 | 
						idev = ipv6_add_dev(blackhole_netdev);
 | 
				
			||||||
	rtnl_unlock();
 | 
						rtnl_unlock();
 | 
				
			||||||
	if (IS_ERR(idev)) {
 | 
						if (IS_ERR(idev)) {
 | 
				
			||||||
		err = PTR_ERR(idev);
 | 
							err = PTR_ERR(idev);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -156,14 +156,10 @@ void rt6_uncached_list_del(struct rt6_info *rt)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 | 
					static void rt6_uncached_list_flush_dev(struct net_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct net_device *loopback_dev = net->loopback_dev;
 | 
					 | 
				
			||||||
	int cpu;
 | 
						int cpu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (dev == loopback_dev)
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for_each_possible_cpu(cpu) {
 | 
						for_each_possible_cpu(cpu) {
 | 
				
			||||||
		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 | 
							struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 | 
				
			||||||
		struct rt6_info *rt;
 | 
							struct rt6_info *rt;
 | 
				
			||||||
| 
						 | 
					@ -174,7 +170,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 | 
				
			||||||
			struct net_device *rt_dev = rt->dst.dev;
 | 
								struct net_device *rt_dev = rt->dst.dev;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (rt_idev->dev == dev) {
 | 
								if (rt_idev->dev == dev) {
 | 
				
			||||||
				rt->rt6i_idev = in6_dev_get(loopback_dev);
 | 
									rt->rt6i_idev = in6_dev_get(blackhole_netdev);
 | 
				
			||||||
				in6_dev_put(rt_idev);
 | 
									in6_dev_put(rt_idev);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -371,13 +367,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct rt6_info *rt = (struct rt6_info *)dst;
 | 
						struct rt6_info *rt = (struct rt6_info *)dst;
 | 
				
			||||||
	struct inet6_dev *idev = rt->rt6i_idev;
 | 
						struct inet6_dev *idev = rt->rt6i_idev;
 | 
				
			||||||
	struct net_device *loopback_dev =
 | 
					 | 
				
			||||||
		dev_net(dev)->loopback_dev;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (idev && idev->dev != loopback_dev) {
 | 
						if (idev && idev->dev != blackhole_netdev) {
 | 
				
			||||||
		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
 | 
							struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
 | 
				
			||||||
		if (loopback_idev) {
 | 
					
 | 
				
			||||||
			rt->rt6i_idev = loopback_idev;
 | 
							if (blackhole_idev) {
 | 
				
			||||||
 | 
								rt->rt6i_idev = blackhole_idev;
 | 
				
			||||||
			in6_dev_put(idev);
 | 
								in6_dev_put(idev);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -4892,7 +4887,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
 | 
				
			||||||
void rt6_disable_ip(struct net_device *dev, unsigned long event)
 | 
					void rt6_disable_ip(struct net_device *dev, unsigned long event)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	rt6_sync_down_dev(dev, event);
 | 
						rt6_sync_down_dev(dev, event);
 | 
				
			||||||
	rt6_uncached_list_flush_dev(dev_net(dev), dev);
 | 
						rt6_uncached_list_flush_dev(dev);
 | 
				
			||||||
	neigh_ifdown(&nd_tbl, dev);
 | 
						neigh_ifdown(&nd_tbl, dev);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue