forked from mirrors/linux
		
	udp: implement GRO for plain UDP sockets.
This is the RX counterpart of commit bec1f6f697 ("udp: generate gso
with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also
eligible for GRO in the rx path: UDP segments directed to such socket
are assembled into a larger GSO_UDP_L4 packet.
The core UDP GRO support is enabled with setsockopt(UDP_GRO).
Initial benchmark numbers:
Before:
udp rx:   1079 MB/s   769065 calls/s
After:
udp rx:   1466 MB/s    24877 calls/s
This change introduces a side effect in respect to UDP tunnels:
after a UDP tunnel creation, now the kernel performs a lookup per ingress
UDP packet, while before such lookup happened only if the ingress packet
carried a valid internal header csum.
rfc v2 -> rfc v3:
 - fixed typos in macro name and comments
 - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1
 - acquire socket lock in UDP_GRO setsockopt
rfc v1 -> rfc v2:
 - use a new option to enable UDP GRO
 - use static keys to protect the UDP GRO socket lookup
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									60fb9567bf
								
							
						
					
					
						commit
						e20cf8d3f1
					
				
					 5 changed files with 99 additions and 28 deletions
				
			
		|  | @ -50,11 +50,12 @@ struct udp_sock { | ||||||
| 	__u8		 encap_type;	/* Is this an Encapsulation socket? */ | 	__u8		 encap_type;	/* Is this an Encapsulation socket? */ | ||||||
| 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ | 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ | ||||||
| 			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ | 			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ | ||||||
| 			 encap_enabled:1; /* This socket enabled encap
 | 			 encap_enabled:1, /* This socket enabled encap
 | ||||||
| 					   * processing; UDP tunnels and | 					   * processing; UDP tunnels and | ||||||
| 					   * different encapsulation layer set | 					   * different encapsulation layer set | ||||||
| 					   * this | 					   * this | ||||||
| 					   */ | 					   */ | ||||||
|  | 			 gro_enabled:1;	/* Can accept GRO packets */ | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Following member retains the information to create a UDP header | 	 * Following member retains the information to create a UDP header | ||||||
| 	 * when the socket is uncorked. | 	 * when the socket is uncorked. | ||||||
|  |  | ||||||
|  | @ -33,6 +33,7 @@ struct udphdr { | ||||||
| #define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */ | #define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */ | ||||||
| #define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */ | #define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */ | ||||||
| #define UDP_SEGMENT	103	/* Set GSO segmentation size */ | #define UDP_SEGMENT	103	/* Set GSO segmentation size */ | ||||||
|  | #define UDP_GRO		104	/* This socket can receive UDP GRO packets */ | ||||||
| 
 | 
 | ||||||
| /* UDP encapsulation types */ | /* UDP encapsulation types */ | ||||||
| #define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ | #define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ | ||||||
|  |  | ||||||
|  | @ -2473,6 +2473,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | ||||||
| 		up->gso_size = val; | 		up->gso_size = val; | ||||||
| 		break; | 		break; | ||||||
| 
 | 
 | ||||||
|  | 	case UDP_GRO: | ||||||
|  | 		lock_sock(sk); | ||||||
|  | 		if (valbool) | ||||||
|  | 			udp_tunnel_encap_enable(sk->sk_socket); | ||||||
|  | 		up->gro_enabled = valbool; | ||||||
|  | 		release_sock(sk); | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * 	UDP-Lite's partial checksum coverage (RFC 3828). | 	 * 	UDP-Lite's partial checksum coverage (RFC 3828). | ||||||
| 	 */ | 	 */ | ||||||
|  |  | ||||||
|  | @ -343,6 +343,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, | ||||||
| 	return segs; | 	return segs; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #define UDP_GRO_CNT_MAX 64 | ||||||
|  | static struct sk_buff *udp_gro_receive_segment(struct list_head *head, | ||||||
|  | 					       struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	struct udphdr *uh = udp_hdr(skb); | ||||||
|  | 	struct sk_buff *pp = NULL; | ||||||
|  | 	struct udphdr *uh2; | ||||||
|  | 	struct sk_buff *p; | ||||||
|  | 
 | ||||||
|  | 	/* requires non zero csum, for symmetry with GSO */ | ||||||
|  | 	if (!uh->check) { | ||||||
|  | 		NAPI_GRO_CB(skb)->flush = 1; | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* pull encapsulating udp header */ | ||||||
|  | 	skb_gro_pull(skb, sizeof(struct udphdr)); | ||||||
|  | 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry(p, head, list) { | ||||||
|  | 		if (!NAPI_GRO_CB(p)->same_flow) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		uh2 = udp_hdr(p); | ||||||
|  | 
 | ||||||
|  | 		/* Match ports only, as csum is always non zero */ | ||||||
|  | 		if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { | ||||||
|  | 			NAPI_GRO_CB(p)->same_flow = 0; | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		/* Terminate the flow on len mismatch or if it grow "too much".
 | ||||||
|  | 		 * Under small packet flood GRO count could elsewhere grow a lot | ||||||
|  | 		 * leading to execessive truesize values | ||||||
|  | 		 */ | ||||||
|  | 		if (!skb_gro_receive(p, skb) && | ||||||
|  | 		    NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) | ||||||
|  | 			pp = p; | ||||||
|  | 		else if (uh->len != uh2->len) | ||||||
|  | 			pp = p; | ||||||
|  | 
 | ||||||
|  | 		return pp; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* mismatch, but we never need to flush */ | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, | struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, | ||||||
| 				struct udphdr *uh, udp_lookup_t lookup) | 				struct udphdr *uh, udp_lookup_t lookup) | ||||||
| { | { | ||||||
|  | @ -353,23 +401,27 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, | ||||||
| 	int flush = 1; | 	int flush = 1; | ||||||
| 	struct sock *sk; | 	struct sock *sk; | ||||||
| 
 | 
 | ||||||
|  | 	rcu_read_lock(); | ||||||
|  | 	sk = (*lookup)(skb, uh->source, uh->dest); | ||||||
|  | 	if (!sk) | ||||||
|  | 		goto out_unlock; | ||||||
|  | 
 | ||||||
|  | 	if (udp_sk(sk)->gro_enabled) { | ||||||
|  | 		pp = call_gro_receive(udp_gro_receive_segment, head, skb); | ||||||
|  | 		rcu_read_unlock(); | ||||||
|  | 		return pp; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	if (NAPI_GRO_CB(skb)->encap_mark || | 	if (NAPI_GRO_CB(skb)->encap_mark || | ||||||
| 	    (skb->ip_summed != CHECKSUM_PARTIAL && | 	    (skb->ip_summed != CHECKSUM_PARTIAL && | ||||||
| 	     NAPI_GRO_CB(skb)->csum_cnt == 0 && | 	     NAPI_GRO_CB(skb)->csum_cnt == 0 && | ||||||
| 	     !NAPI_GRO_CB(skb)->csum_valid)) | 	     !NAPI_GRO_CB(skb)->csum_valid) || | ||||||
| 		goto out; | 	    !udp_sk(sk)->gro_receive) | ||||||
|  | 		goto out_unlock; | ||||||
| 
 | 
 | ||||||
| 	/* mark that this skb passed once through the tunnel gro layer */ | 	/* mark that this skb passed once through the tunnel gro layer */ | ||||||
| 	NAPI_GRO_CB(skb)->encap_mark = 1; | 	NAPI_GRO_CB(skb)->encap_mark = 1; | ||||||
| 
 | 
 | ||||||
| 	rcu_read_lock(); |  | ||||||
| 	sk = (*lookup)(skb, uh->source, uh->dest); |  | ||||||
| 
 |  | ||||||
| 	if (sk && udp_sk(sk)->gro_receive) |  | ||||||
| 		goto unflush; |  | ||||||
| 	goto out_unlock; |  | ||||||
| 
 |  | ||||||
| unflush: |  | ||||||
| 	flush = 0; | 	flush = 0; | ||||||
| 
 | 
 | ||||||
| 	list_for_each_entry(p, head, list) { | 	list_for_each_entry(p, head, list) { | ||||||
|  | @ -394,7 +446,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, | ||||||
| 
 | 
 | ||||||
| out_unlock: | out_unlock: | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| out: |  | ||||||
| 	skb_gro_flush_final(skb, pp, flush); | 	skb_gro_flush_final(skb, pp, flush); | ||||||
| 	return pp; | 	return pp; | ||||||
| } | } | ||||||
|  | @ -427,6 +478,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head, | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int udp_gro_complete_segment(struct sk_buff *skb) | ||||||
|  | { | ||||||
|  | 	struct udphdr *uh = udp_hdr(skb); | ||||||
|  | 
 | ||||||
|  | 	skb->csum_start = (unsigned char *)uh - skb->head; | ||||||
|  | 	skb->csum_offset = offsetof(struct udphdr, check); | ||||||
|  | 	skb->ip_summed = CHECKSUM_PARTIAL; | ||||||
|  | 
 | ||||||
|  | 	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; | ||||||
|  | 	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| int udp_gro_complete(struct sk_buff *skb, int nhoff, | int udp_gro_complete(struct sk_buff *skb, int nhoff, | ||||||
| 		     udp_lookup_t lookup) | 		     udp_lookup_t lookup) | ||||||
| { | { | ||||||
|  | @ -437,16 +501,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, | ||||||
| 
 | 
 | ||||||
| 	uh->len = newlen; | 	uh->len = newlen; | ||||||
| 
 | 
 | ||||||
| 	/* Set encapsulation before calling into inner gro_complete() functions
 |  | ||||||
| 	 * to make them set up the inner offsets. |  | ||||||
| 	 */ |  | ||||||
| 	skb->encapsulation = 1; |  | ||||||
| 
 |  | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 	sk = (*lookup)(skb, uh->source, uh->dest); | 	sk = (*lookup)(skb, uh->source, uh->dest); | ||||||
| 	if (sk && udp_sk(sk)->gro_complete) | 	if (sk && udp_sk(sk)->gro_enabled) { | ||||||
|  | 		err = udp_gro_complete_segment(skb); | ||||||
|  | 	} else if (sk && udp_sk(sk)->gro_complete) { | ||||||
|  | 		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM | ||||||
|  | 					: SKB_GSO_UDP_TUNNEL; | ||||||
|  | 
 | ||||||
|  | 		/* Set encapsulation before calling into inner gro_complete()
 | ||||||
|  | 		 * functions to make them set up the inner offsets. | ||||||
|  | 		 */ | ||||||
|  | 		skb->encapsulation = 1; | ||||||
| 		err = udp_sk(sk)->gro_complete(sk, skb, | 		err = udp_sk(sk)->gro_complete(sk, skb, | ||||||
| 				nhoff + sizeof(struct udphdr)); | 				nhoff + sizeof(struct udphdr)); | ||||||
|  | 	} | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| 
 | 
 | ||||||
| 	if (skb->remcsum_offload) | 	if (skb->remcsum_offload) | ||||||
|  | @ -461,13 +530,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) | ||||||
| 	const struct iphdr *iph = ip_hdr(skb); | 	const struct iphdr *iph = ip_hdr(skb); | ||||||
| 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | ||||||
| 
 | 
 | ||||||
| 	if (uh->check) { | 	if (uh->check) | ||||||
| 		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; |  | ||||||
| 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, | 		uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, | ||||||
| 					  iph->daddr, 0); | 					  iph->daddr, 0); | ||||||
| 	} else { |  | ||||||
| 		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); | 	return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) | ||||||
| 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb); | 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb); | ||||||
| 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | ||||||
| 
 | 
 | ||||||
| 	if (uh->check) { | 	if (uh->check) | ||||||
| 		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; |  | ||||||
| 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, | 		uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, | ||||||
| 					  &ipv6h->daddr, 0); | 					  &ipv6h->daddr, 0); | ||||||
| 	} else { |  | ||||||
| 		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); | 	return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Paolo Abeni
						Paolo Abeni