mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	ipv6: Only create RTF_CACHE routes after encountering pmtu exception
This patch creates a RTF_CACHE routes only after encountering a pmtu exception. After ip6_rt_update_pmtu() has inserted the RTF_CACHE route to the fib6 tree, the rt->rt6i_node->fn_sernum is bumped which will fail the ip6_dst_check() and trigger a relookup. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> Cc: Steffen Klassert <steffen.klassert@secunet.com> Cc: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									8b9df26577
								
							
						
					
					
						commit
						45e4fd2668
					
				
					 3 changed files with 52 additions and 49 deletions
				
			
		| 
						 | 
					@ -202,7 +202,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (rt->rt6i_flags & RTF_GATEWAY)
 | 
						if (rt->rt6i_flags & RTF_GATEWAY)
 | 
				
			||||||
		return &rt->rt6i_gateway;
 | 
							return &rt->rt6i_gateway;
 | 
				
			||||||
	else if (rt->rt6i_flags & RTF_CACHE)
 | 
						else if (unlikely(rt->rt6i_flags & RTF_CACHE))
 | 
				
			||||||
		return &rt->rt6i_dst.addr;
 | 
							return &rt->rt6i_dst.addr;
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		return daddr;
 | 
							return daddr;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -738,6 +738,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 | 
				
			||||||
					rt6_clean_expires(iter);
 | 
										rt6_clean_expires(iter);
 | 
				
			||||||
				else
 | 
									else
 | 
				
			||||||
					rt6_set_expires(iter, rt->dst.expires);
 | 
										rt6_set_expires(iter, rt->dst.expires);
 | 
				
			||||||
 | 
									iter->rt6i_pmtu = rt->rt6i_pmtu;
 | 
				
			||||||
				return -EEXIST;
 | 
									return -EEXIST;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			/* If we have the same destination and the same metric,
 | 
								/* If we have the same destination and the same metric,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -873,16 +873,13 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 | 
				
			||||||
				      struct flowi6 *fl6, int flags)
 | 
									      struct flowi6 *fl6, int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct fib6_node *fn, *saved_fn;
 | 
						struct fib6_node *fn, *saved_fn;
 | 
				
			||||||
	struct rt6_info *rt, *nrt;
 | 
						struct rt6_info *rt;
 | 
				
			||||||
	int strict = 0;
 | 
						int strict = 0;
 | 
				
			||||||
	int attempts = 3;
 | 
					 | 
				
			||||||
	int err;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	strict |= flags & RT6_LOOKUP_F_IFACE;
 | 
						strict |= flags & RT6_LOOKUP_F_IFACE;
 | 
				
			||||||
	if (net->ipv6.devconf_all->forwarding == 0)
 | 
						if (net->ipv6.devconf_all->forwarding == 0)
 | 
				
			||||||
		strict |= RT6_LOOKUP_F_REACHABLE;
 | 
							strict |= RT6_LOOKUP_F_REACHABLE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
redo_fib6_lookup_lock:
 | 
					 | 
				
			||||||
	read_lock_bh(&table->tb6_lock);
 | 
						read_lock_bh(&table->tb6_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 | 
						fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 | 
				
			||||||
| 
						 | 
					@ -901,46 +898,12 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 | 
				
			||||||
			strict &= ~RT6_LOOKUP_F_REACHABLE;
 | 
								strict &= ~RT6_LOOKUP_F_REACHABLE;
 | 
				
			||||||
			fn = saved_fn;
 | 
								fn = saved_fn;
 | 
				
			||||||
			goto redo_rt6_select;
 | 
								goto redo_rt6_select;
 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			dst_hold(&rt->dst);
 | 
					 | 
				
			||||||
			read_unlock_bh(&table->tb6_lock);
 | 
					 | 
				
			||||||
			goto out2;
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dst_hold(&rt->dst);
 | 
						dst_hold(&rt->dst);
 | 
				
			||||||
	read_unlock_bh(&table->tb6_lock);
 | 
						read_unlock_bh(&table->tb6_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (rt->rt6i_flags & RTF_CACHE)
 | 
					 | 
				
			||||||
		goto out2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!rt6_is_gw_or_nonexthop(rt) ||
 | 
					 | 
				
			||||||
	    !(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL))
 | 
					 | 
				
			||||||
		nrt = ip6_rt_cache_alloc(rt, &fl6->daddr, &fl6->saddr);
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
		goto out2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ip6_rt_put(rt);
 | 
					 | 
				
			||||||
	rt = nrt ? : net->ipv6.ip6_null_entry;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	dst_hold(&rt->dst);
 | 
					 | 
				
			||||||
	if (nrt) {
 | 
					 | 
				
			||||||
		err = ip6_ins_rt(nrt);
 | 
					 | 
				
			||||||
		if (!err)
 | 
					 | 
				
			||||||
			goto out2;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (--attempts <= 0)
 | 
					 | 
				
			||||||
		goto out2;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Race condition! In the gap, when table->tb6_lock was
 | 
					 | 
				
			||||||
	 * released someone could insert this route.  Relookup.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	ip6_rt_put(rt);
 | 
					 | 
				
			||||||
	goto redo_fib6_lookup_lock;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
out2:
 | 
					 | 
				
			||||||
	rt6_dst_from_metrics_check(rt);
 | 
						rt6_dst_from_metrics_check(rt);
 | 
				
			||||||
	rt->dst.lastuse = jiffies;
 | 
						rt->dst.lastuse = jiffies;
 | 
				
			||||||
	rt->dst.__use++;
 | 
						rt->dst.__use++;
 | 
				
			||||||
| 
						 | 
					@ -1113,24 +1076,63 @@ static void ip6_link_failure(struct sk_buff *skb)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 | 
					static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 | 
				
			||||||
			       struct sk_buff *skb, u32 mtu)
 | 
					{
 | 
				
			||||||
 | 
						struct net *net = dev_net(rt->dst.dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rt->rt6i_flags |= RTF_MODIFIED;
 | 
				
			||||||
 | 
						rt->rt6i_pmtu = mtu;
 | 
				
			||||||
 | 
						rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 | 
				
			||||||
 | 
									 const struct ipv6hdr *iph, u32 mtu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct rt6_info *rt6 = (struct rt6_info *)dst;
 | 
						struct rt6_info *rt6 = (struct rt6_info *)dst;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (rt6->rt6i_flags & RTF_LOCAL)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dst_confirm(dst);
 | 
						dst_confirm(dst);
 | 
				
			||||||
	if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
 | 
						mtu = max_t(u32, mtu, IPV6_MIN_MTU);
 | 
				
			||||||
		struct net *net = dev_net(dst->dev);
 | 
						if (mtu >= dst_mtu(dst))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		rt6->rt6i_flags |= RTF_MODIFIED;
 | 
						if (rt6->rt6i_flags & RTF_CACHE) {
 | 
				
			||||||
		if (mtu < IPV6_MIN_MTU)
 | 
							rt6_do_update_pmtu(rt6, mtu);
 | 
				
			||||||
			mtu = IPV6_MIN_MTU;
 | 
						} else {
 | 
				
			||||||
 | 
							const struct in6_addr *daddr, *saddr;
 | 
				
			||||||
 | 
							struct rt6_info *nrt6;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		rt6->rt6i_pmtu = mtu;
 | 
							if (iph) {
 | 
				
			||||||
		rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
 | 
								daddr = &iph->daddr;
 | 
				
			||||||
 | 
								saddr = &iph->saddr;
 | 
				
			||||||
 | 
							} else if (sk) {
 | 
				
			||||||
 | 
								daddr = &sk->sk_v6_daddr;
 | 
				
			||||||
 | 
								saddr = &inet6_sk(sk)->saddr;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								return;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
 | 
				
			||||||
 | 
							if (nrt6) {
 | 
				
			||||||
 | 
								rt6_do_update_pmtu(nrt6, mtu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/* ip6_ins_rt(nrt6) will bump the
 | 
				
			||||||
 | 
								 * rt6->rt6i_node->fn_sernum
 | 
				
			||||||
 | 
								 * which will fail the next rt6_check() and
 | 
				
			||||||
 | 
								 * invalidate the sk->sk_dst_cache.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								ip6_ins_rt(nrt6);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 | 
				
			||||||
 | 
								       struct sk_buff *skb, u32 mtu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 | 
					void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 | 
				
			||||||
		     int oif, u32 mark)
 | 
							     int oif, u32 mark)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1147,7 +1149,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	dst = ip6_route_output(net, NULL, &fl6);
 | 
						dst = ip6_route_output(net, NULL, &fl6);
 | 
				
			||||||
	if (!dst->error)
 | 
						if (!dst->error)
 | 
				
			||||||
		ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
 | 
							__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
 | 
				
			||||||
	dst_release(dst);
 | 
						dst_release(dst);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(ip6_update_pmtu);
 | 
					EXPORT_SYMBOL_GPL(ip6_update_pmtu);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue