forked from mirrors/linux
		
	net: add support for ipv4 big tcp
Similar to Eric's IPv6 BIG TCP, this patch is to enable IPv4 BIG TCP. Firstly, allow sk->sk_gso_max_size to be set to a value greater than GSO_LEGACY_MAX_SIZE by not trimming gso_max_size in sk_trim_gso_size() for IPv4 TCP sockets. Then on TX path, set IP header tot_len to 0 when skb->len > IP_MAX_MTU in __ip_local_out() to allow to send BIG TCP packets, and this implies that skb->len is the length of a IPv4 packet; On RX path, use skb->len as the length of the IPv4 packet when the IP header tot_len is 0 and skb->len > IP_MAX_MTU in ip_rcv_core(). As the API iph_set_totlen() and skb_ip_totlen() are used in __ip_local_out() and ip_rcv_core(), we only need to update these APIs. Also in GRO receive, add the check for ETH_P_IP/IPPROTO_TCP, and allows the merged packet size >= GRO_LEGACY_MAX_SIZE in skb_gro_receive(). In GRO complete, set IP header tot_len to 0 when the merged packet size greater than IP_MAX_MTU in iph_set_totlen() so that it can be processed on RX path. Note that by checking skb_is_gso_tcp() in API iph_totlen(), it makes this implementation safe to use iph->len == 0 indicates IPv4 BIG TCP packets. Signed-off-by: Xin Long <lucien.xin@gmail.com> Reviewed-by: David Ahern <dsahern@kernel.org> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									9eefedd58a
								
							
						
					
					
						commit
						b1a78b9b98
					
				
					 5 changed files with 27 additions and 22 deletions
				
			
		| 
						 | 
					@ -162,16 +162,18 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 | 
				
			||||||
	struct sk_buff *lp;
 | 
						struct sk_buff *lp;
 | 
				
			||||||
	int segs;
 | 
						int segs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
 | 
						/* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
 | 
				
			||||||
	gro_max_size = READ_ONCE(p->dev->gro_max_size);
 | 
						gro_max_size = p->protocol == htons(ETH_P_IPV6) ?
 | 
				
			||||||
 | 
								READ_ONCE(p->dev->gro_max_size) :
 | 
				
			||||||
 | 
									READ_ONCE(p->dev->gro_ipv4_max_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
 | 
						if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
 | 
				
			||||||
		return -E2BIG;
 | 
							return -E2BIG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
 | 
						if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
 | 
				
			||||||
		if (p->protocol != htons(ETH_P_IPV6) ||
 | 
							if (NAPI_GRO_CB(skb)->proto != IPPROTO_TCP ||
 | 
				
			||||||
		    skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
 | 
							    (p->protocol == htons(ETH_P_IPV6) &&
 | 
				
			||||||
		    ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
 | 
							     skb_headroom(p) < sizeof(struct hop_jumbo_hdr)) ||
 | 
				
			||||||
		    p->encapsulation)
 | 
							    p->encapsulation)
 | 
				
			||||||
			return -E2BIG;
 | 
								return -E2BIG;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2373,17 +2373,22 @@ void sk_free_unlock_clone(struct sock *sk)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
 | 
					EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void sk_trim_gso_size(struct sock *sk)
 | 
					static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE)
 | 
						bool is_ipv6 = false;
 | 
				
			||||||
		return;
 | 
						u32 max_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if IS_ENABLED(CONFIG_IPV6)
 | 
					#if IS_ENABLED(CONFIG_IPV6)
 | 
				
			||||||
	if (sk->sk_family == AF_INET6 &&
 | 
						is_ipv6 = (sk->sk_family == AF_INET6 &&
 | 
				
			||||||
	    sk_is_tcp(sk) &&
 | 
							   !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
 | 
				
			||||||
	    !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE;
 | 
						/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
 | 
				
			||||||
 | 
						max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
 | 
				
			||||||
 | 
								READ_ONCE(dst->dev->gso_ipv4_max_size);
 | 
				
			||||||
 | 
						if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
 | 
				
			||||||
 | 
							max_size = GSO_LEGACY_MAX_SIZE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return max_size - (MAX_TCP_HEADER + 1);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 | 
					void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 | 
				
			||||||
| 
						 | 
					@ -2403,10 +2408,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 | 
				
			||||||
			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 | 
								sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
 | 
								sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
 | 
				
			||||||
			/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
 | 
								sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
 | 
				
			||||||
			sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
 | 
					 | 
				
			||||||
			sk_trim_gso_size(sk);
 | 
					 | 
				
			||||||
			sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
 | 
					 | 
				
			||||||
			/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
 | 
								/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
 | 
				
			||||||
			max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
 | 
								max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1485,6 +1485,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 | 
				
			||||||
	if (unlikely(ip_fast_csum((u8 *)iph, 5)))
 | 
						if (unlikely(ip_fast_csum((u8 *)iph, 5)))
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						NAPI_GRO_CB(skb)->proto = proto;
 | 
				
			||||||
	id = ntohl(*(__be32 *)&iph->id);
 | 
						id = ntohl(*(__be32 *)&iph->id);
 | 
				
			||||||
	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
 | 
						flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
 | 
				
			||||||
	id >>= 16;
 | 
						id >>= 16;
 | 
				
			||||||
| 
						 | 
					@ -1618,9 +1619,9 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int inet_gro_complete(struct sk_buff *skb, int nhoff)
 | 
					int inet_gro_complete(struct sk_buff *skb, int nhoff)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	__be16 newlen = htons(skb->len - nhoff);
 | 
					 | 
				
			||||||
	struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
 | 
						struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
 | 
				
			||||||
	const struct net_offload *ops;
 | 
						const struct net_offload *ops;
 | 
				
			||||||
 | 
						__be16 totlen = iph->tot_len;
 | 
				
			||||||
	int proto = iph->protocol;
 | 
						int proto = iph->protocol;
 | 
				
			||||||
	int err = -ENOSYS;
 | 
						int err = -ENOSYS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1629,8 +1630,8 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
 | 
				
			||||||
		skb_set_inner_network_header(skb, nhoff);
 | 
							skb_set_inner_network_header(skb, nhoff);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	csum_replace2(&iph->check, iph->tot_len, newlen);
 | 
						iph_set_totlen(iph, skb->len - nhoff);
 | 
				
			||||||
	iph->tot_len = newlen;
 | 
						csum_replace2(&iph->check, totlen, iph->tot_len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ops = rcu_dereference(inet_offloads[proto]);
 | 
						ops = rcu_dereference(inet_offloads[proto]);
 | 
				
			||||||
	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 | 
						if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -511,7 +511,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 | 
				
			||||||
	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 | 
						if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 | 
				
			||||||
		goto csum_error;
 | 
							goto csum_error;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	len = ntohs(iph->tot_len);
 | 
						len = iph_totlen(skb, iph);
 | 
				
			||||||
	if (skb->len < len) {
 | 
						if (skb->len < len) {
 | 
				
			||||||
		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
 | 
							drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
 | 
				
			||||||
		__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
 | 
							__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -100,7 +100,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct iphdr *iph = ip_hdr(skb);
 | 
						struct iphdr *iph = ip_hdr(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	iph->tot_len = htons(skb->len);
 | 
						iph_set_totlen(iph, skb->len);
 | 
				
			||||||
	ip_send_check(iph);
 | 
						ip_send_check(iph);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* if egress device is enslaved to an L3 master device pass the
 | 
						/* if egress device is enslaved to an L3 master device pass the
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue