forked from mirrors/linux
		
	ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu
Prior to the rework of PMTU information storage in commit2c8cec5c10("ipv4: Cache learned PMTU information in inetpeer."), when a PMTU event advertising a PMTU smaller than net.ipv4.route.min_pmtu was received, we would disable setting the DF flag on packets by locking the MTU metric, and set the PMTU to net.ipv4.route.min_pmtu. Since then, we don't disable DF, and set PMTU to net.ipv4.route.min_pmtu, so the intermediate router that has this link with a small MTU will have to drop the packets. This patch reestablishes pre-2.6.39 behavior by splitting rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu. rt_mtu_locked indicates that we shouldn't set the DF bit on that path, and is checked in ip_dont_fragment(). One possible workaround is to set net.ipv4.route.min_pmtu to a value low enough to accommodate the lowest MTU encountered. Fixes:2c8cec5c10("ipv4: Cache learned PMTU information in inetpeer.") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Reviewed-by: Stefano Brivio <sbrivio@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									16c2e4db83
								
							
						
					
					
						commit
						d52e5a7e7c
					
				
					 5 changed files with 32 additions and 10 deletions
				
			
		|  | @ -328,6 +328,13 @@ int ip_decrease_ttl(struct iphdr *iph) | ||||||
| 	return --iph->ttl; | 	return --iph->ttl; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline int ip_mtu_locked(const struct dst_entry *dst) | ||||||
|  | { | ||||||
|  | 	const struct rtable *rt = (const struct rtable *)dst; | ||||||
|  | 
 | ||||||
|  | 	return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static inline | static inline | ||||||
| int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) | int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) | ||||||
| { | { | ||||||
|  | @ -335,7 +342,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst) | ||||||
| 
 | 
 | ||||||
| 	return  pmtudisc == IP_PMTUDISC_DO || | 	return  pmtudisc == IP_PMTUDISC_DO || | ||||||
| 		(pmtudisc == IP_PMTUDISC_WANT && | 		(pmtudisc == IP_PMTUDISC_WANT && | ||||||
| 		 !(dst_metric_locked(dst, RTAX_MTU))); | 		 !ip_mtu_locked(dst)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline bool ip_sk_accept_pmtu(const struct sock *sk) | static inline bool ip_sk_accept_pmtu(const struct sock *sk) | ||||||
|  | @ -361,7 +368,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, | ||||||
| 	struct net *net = dev_net(dst->dev); | 	struct net *net = dev_net(dst->dev); | ||||||
| 
 | 
 | ||||||
| 	if (net->ipv4.sysctl_ip_fwd_use_pmtu || | 	if (net->ipv4.sysctl_ip_fwd_use_pmtu || | ||||||
| 	    dst_metric_locked(dst, RTAX_MTU) || | 	    ip_mtu_locked(dst) || | ||||||
| 	    !forwarding) | 	    !forwarding) | ||||||
| 		return dst_mtu(dst); | 		return dst_mtu(dst); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -59,6 +59,7 @@ struct fib_nh_exception { | ||||||
| 	int				fnhe_genid; | 	int				fnhe_genid; | ||||||
| 	__be32				fnhe_daddr; | 	__be32				fnhe_daddr; | ||||||
| 	u32				fnhe_pmtu; | 	u32				fnhe_pmtu; | ||||||
|  | 	bool				fnhe_mtu_locked; | ||||||
| 	__be32				fnhe_gw; | 	__be32				fnhe_gw; | ||||||
| 	unsigned long			fnhe_expires; | 	unsigned long			fnhe_expires; | ||||||
| 	struct rtable __rcu		*fnhe_rth_input; | 	struct rtable __rcu		*fnhe_rth_input; | ||||||
|  |  | ||||||
|  | @ -63,7 +63,8 @@ struct rtable { | ||||||
| 	__be32			rt_gateway; | 	__be32			rt_gateway; | ||||||
| 
 | 
 | ||||||
| 	/* Miscellaneous cached information */ | 	/* Miscellaneous cached information */ | ||||||
| 	u32			rt_pmtu; | 	u32			rt_mtu_locked:1, | ||||||
|  | 				rt_pmtu:31; | ||||||
| 
 | 
 | ||||||
| 	u32			rt_table_id; | 	u32			rt_table_id; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr) | ||||||
| static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) | static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) | ||||||
| { | { | ||||||
| 	rt->rt_pmtu = fnhe->fnhe_pmtu; | 	rt->rt_pmtu = fnhe->fnhe_pmtu; | ||||||
|  | 	rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; | ||||||
| 	rt->dst.expires = fnhe->fnhe_expires; | 	rt->dst.expires = fnhe->fnhe_expires; | ||||||
| 
 | 
 | ||||||
| 	if (fnhe->fnhe_gw) { | 	if (fnhe->fnhe_gw) { | ||||||
|  | @ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | ||||||
| 				  u32 pmtu, unsigned long expires) | 				  u32 pmtu, bool lock, unsigned long expires) | ||||||
| { | { | ||||||
| 	struct fnhe_hash_bucket *hash; | 	struct fnhe_hash_bucket *hash; | ||||||
| 	struct fib_nh_exception *fnhe; | 	struct fib_nh_exception *fnhe; | ||||||
|  | @ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | ||||||
| 			fnhe->fnhe_genid = genid; | 			fnhe->fnhe_genid = genid; | ||||||
| 		if (gw) | 		if (gw) | ||||||
| 			fnhe->fnhe_gw = gw; | 			fnhe->fnhe_gw = gw; | ||||||
| 		if (pmtu) | 		if (pmtu) { | ||||||
| 			fnhe->fnhe_pmtu = pmtu; | 			fnhe->fnhe_pmtu = pmtu; | ||||||
|  | 			fnhe->fnhe_mtu_locked = lock; | ||||||
|  | 		} | ||||||
| 		fnhe->fnhe_expires = max(1UL, expires); | 		fnhe->fnhe_expires = max(1UL, expires); | ||||||
| 		/* Update all cached dsts too */ | 		/* Update all cached dsts too */ | ||||||
| 		rt = rcu_dereference(fnhe->fnhe_rth_input); | 		rt = rcu_dereference(fnhe->fnhe_rth_input); | ||||||
|  | @ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | ||||||
| 		fnhe->fnhe_daddr = daddr; | 		fnhe->fnhe_daddr = daddr; | ||||||
| 		fnhe->fnhe_gw = gw; | 		fnhe->fnhe_gw = gw; | ||||||
| 		fnhe->fnhe_pmtu = pmtu; | 		fnhe->fnhe_pmtu = pmtu; | ||||||
|  | 		fnhe->fnhe_mtu_locked = lock; | ||||||
| 		fnhe->fnhe_expires = expires; | 		fnhe->fnhe_expires = expires; | ||||||
| 
 | 
 | ||||||
| 		/* Exception created; mark the cached routes for the nexthop
 | 		/* Exception created; mark the cached routes for the nexthop
 | ||||||
|  | @ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow | ||||||
| 				struct fib_nh *nh = &FIB_RES_NH(res); | 				struct fib_nh *nh = &FIB_RES_NH(res); | ||||||
| 
 | 
 | ||||||
| 				update_or_create_fnhe(nh, fl4->daddr, new_gw, | 				update_or_create_fnhe(nh, fl4->daddr, new_gw, | ||||||
| 						0, jiffies + ip_rt_gc_timeout); | 						0, false, | ||||||
|  | 						jiffies + ip_rt_gc_timeout); | ||||||
| 			} | 			} | ||||||
| 			if (kill_route) | 			if (kill_route) | ||||||
| 				rt->dst.obsolete = DST_OBSOLETE_KILL; | 				rt->dst.obsolete = DST_OBSOLETE_KILL; | ||||||
|  | @ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | ||||||
| { | { | ||||||
| 	struct dst_entry *dst = &rt->dst; | 	struct dst_entry *dst = &rt->dst; | ||||||
| 	struct fib_result res; | 	struct fib_result res; | ||||||
|  | 	bool lock = false; | ||||||
| 
 | 
 | ||||||
| 	if (dst_metric_locked(dst, RTAX_MTU)) | 	if (ip_mtu_locked(dst)) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	if (ipv4_mtu(dst) < mtu) | 	if (ipv4_mtu(dst) < mtu) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	if (mtu < ip_rt_min_pmtu) | 	if (mtu < ip_rt_min_pmtu) { | ||||||
|  | 		lock = true; | ||||||
| 		mtu = ip_rt_min_pmtu; | 		mtu = ip_rt_min_pmtu; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	if (rt->rt_pmtu == mtu && | 	if (rt->rt_pmtu == mtu && | ||||||
| 	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) | 	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) | ||||||
|  | @ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | ||||||
| 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { | 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { | ||||||
| 		struct fib_nh *nh = &FIB_RES_NH(res); | 		struct fib_nh *nh = &FIB_RES_NH(res); | ||||||
| 
 | 
 | ||||||
| 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, | 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, | ||||||
| 				      jiffies + ip_rt_mtu_expires); | 				      jiffies + ip_rt_mtu_expires); | ||||||
| 	} | 	} | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
|  | @ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | ||||||
| 
 | 
 | ||||||
| 	mtu = READ_ONCE(dst->dev->mtu); | 	mtu = READ_ONCE(dst->dev->mtu); | ||||||
| 
 | 
 | ||||||
| 	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | 	if (unlikely(ip_mtu_locked(dst))) { | ||||||
| 		if (rt->rt_uses_gateway && mtu > 576) | 		if (rt->rt_uses_gateway && mtu > 576) | ||||||
| 			mtu = 576; | 			mtu = 576; | ||||||
| 	} | 	} | ||||||
|  | @ -1521,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, | ||||||
| 		rt->rt_is_input = 0; | 		rt->rt_is_input = 0; | ||||||
| 		rt->rt_iif = 0; | 		rt->rt_iif = 0; | ||||||
| 		rt->rt_pmtu = 0; | 		rt->rt_pmtu = 0; | ||||||
|  | 		rt->rt_mtu_locked = 0; | ||||||
| 		rt->rt_gateway = 0; | 		rt->rt_gateway = 0; | ||||||
| 		rt->rt_uses_gateway = 0; | 		rt->rt_uses_gateway = 0; | ||||||
| 		rt->rt_table_id = 0; | 		rt->rt_table_id = 0; | ||||||
|  | @ -2546,6 +2555,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | ||||||
| 		rt->rt_is_input = ort->rt_is_input; | 		rt->rt_is_input = ort->rt_is_input; | ||||||
| 		rt->rt_iif = ort->rt_iif; | 		rt->rt_iif = ort->rt_iif; | ||||||
| 		rt->rt_pmtu = ort->rt_pmtu; | 		rt->rt_pmtu = ort->rt_pmtu; | ||||||
|  | 		rt->rt_mtu_locked = ort->rt_mtu_locked; | ||||||
| 
 | 
 | ||||||
| 		rt->rt_genid = rt_genid_ipv4(net); | 		rt->rt_genid = rt_genid_ipv4(net); | ||||||
| 		rt->rt_flags = ort->rt_flags; | 		rt->rt_flags = ort->rt_flags; | ||||||
|  | @ -2648,6 +2658,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id, | ||||||
| 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); | 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); | ||||||
| 	if (rt->rt_pmtu && expires) | 	if (rt->rt_pmtu && expires) | ||||||
| 		metrics[RTAX_MTU - 1] = rt->rt_pmtu; | 		metrics[RTAX_MTU - 1] = rt->rt_pmtu; | ||||||
|  | 	if (rt->rt_mtu_locked && expires) | ||||||
|  | 		metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); | ||||||
| 	if (rtnetlink_put_metrics(skb, metrics) < 0) | 	if (rtnetlink_put_metrics(skb, metrics) < 0) | ||||||
| 		goto nla_put_failure; | 		goto nla_put_failure; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | ||||||
| 	xdst->u.rt.rt_gateway = rt->rt_gateway; | 	xdst->u.rt.rt_gateway = rt->rt_gateway; | ||||||
| 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; | 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; | ||||||
| 	xdst->u.rt.rt_pmtu = rt->rt_pmtu; | 	xdst->u.rt.rt_pmtu = rt->rt_pmtu; | ||||||
|  | 	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked; | ||||||
| 	xdst->u.rt.rt_table_id = rt->rt_table_id; | 	xdst->u.rt.rt_table_id = rt->rt_table_id; | ||||||
| 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); | 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); | ||||||
| 	rt_add_uncached_list(&xdst->u.rt); | 	rt_add_uncached_list(&xdst->u.rt); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Sabrina Dubroca
						Sabrina Dubroca