forked from mirrors/linux
		
	Merge branch 'udp-msg_zerocopy'
Willem de Bruijn says: ==================== udp msg_zerocopy Enable MSG_ZEROCOPY for udp sockets Patch 1/3 is the main patch, a rework of RFC patch http://patchwork.ozlabs.org/patch/899630/ more details in the patch commit message Patch 2/3 is an optimization to remove a branch from the UDP hot path and refcount_inc/refcount_dec_and_test pair when zerocopy is used. This used to be included in the first patch in v2. Patch 3/3 runs the already existing udp zerocopy tests as part of kselftest See also recent Linux Plumbers presentation https://linuxplumbersconf.org/event/2/contributions/106/attachments/104/128/willemdebruijn-lpc2018-udpgso-presentation-20181113.pdf Changes: v1 -> v2 - Fixup reverse christmas tree violation v2 -> v3 - Split refcount avoidance optimization into separate patch - Fix refcount leak on error in fragmented case (thanks to Paolo Abeni for pointing this one out!) - Fix refcount inc on zero v3 -> v4 - Move skb_zcopy_set below the only kfree_skb that might cause a premature uarg destroy before skb_zerocopy_put_abort - Move the entire skb_shinfo assignment block, to keep that cacheline access in one place ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
						commit
						6e360f7331
					
				
					 9 changed files with 90 additions and 27 deletions
				
			
		| 
						 | 
					@ -481,10 +481,11 @@ static inline void sock_zerocopy_get(struct ubuf_info *uarg)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void sock_zerocopy_put(struct ubuf_info *uarg);
 | 
					void sock_zerocopy_put(struct ubuf_info *uarg);
 | 
				
			||||||
void sock_zerocopy_put_abort(struct ubuf_info *uarg);
 | 
					void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
 | 
					void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len);
 | 
				
			||||||
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 | 
					int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
			     struct msghdr *msg, int len,
 | 
								     struct msghdr *msg, int len,
 | 
				
			||||||
			     struct ubuf_info *uarg);
 | 
								     struct ubuf_info *uarg);
 | 
				
			||||||
| 
						 | 
					@ -1325,9 +1326,13 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
 | 
				
			||||||
	return is_zcopy ? skb_uarg(skb) : NULL;
 | 
						return is_zcopy ? skb_uarg(skb) : NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
 | 
					static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg,
 | 
				
			||||||
 | 
									 bool *have_ref)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (skb && uarg && !skb_zcopy(skb)) {
 | 
						if (skb && uarg && !skb_zcopy(skb)) {
 | 
				
			||||||
 | 
							if (unlikely(have_ref && *have_ref))
 | 
				
			||||||
 | 
								*have_ref = false;
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
			sock_zerocopy_get(uarg);
 | 
								sock_zerocopy_get(uarg);
 | 
				
			||||||
		skb_shinfo(skb)->destructor_arg = uarg;
 | 
							skb_shinfo(skb)->destructor_arg = uarg;
 | 
				
			||||||
		skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
 | 
							skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
 | 
				
			||||||
| 
						 | 
					@ -1373,7 +1378,7 @@ static inline void skb_zcopy_abort(struct sk_buff *skb)
 | 
				
			||||||
	struct ubuf_info *uarg = skb_zcopy(skb);
 | 
						struct ubuf_info *uarg = skb_zcopy(skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (uarg) {
 | 
						if (uarg) {
 | 
				
			||||||
		sock_zerocopy_put_abort(uarg);
 | 
							sock_zerocopy_put_abort(uarg, false);
 | 
				
			||||||
		skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
 | 
							skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1089,7 +1089,7 @@ void sock_zerocopy_put(struct ubuf_info *uarg)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(sock_zerocopy_put);
 | 
					EXPORT_SYMBOL_GPL(sock_zerocopy_put);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void sock_zerocopy_put_abort(struct ubuf_info *uarg)
 | 
					void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (uarg) {
 | 
						if (uarg) {
 | 
				
			||||||
		struct sock *sk = skb_from_uarg(uarg)->sk;
 | 
							struct sock *sk = skb_from_uarg(uarg)->sk;
 | 
				
			||||||
| 
						 | 
					@ -1097,6 +1097,7 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg)
 | 
				
			||||||
		atomic_dec(&sk->sk_zckey);
 | 
							atomic_dec(&sk->sk_zckey);
 | 
				
			||||||
		uarg->len--;
 | 
							uarg->len--;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (have_uref)
 | 
				
			||||||
			sock_zerocopy_put(uarg);
 | 
								sock_zerocopy_put(uarg);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1105,6 +1106,12 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
 | 
				
			||||||
extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 | 
					extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
				   struct iov_iter *from, size_t length);
 | 
									   struct iov_iter *from, size_t length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 | 
					int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
			     struct msghdr *msg, int len,
 | 
								     struct msghdr *msg, int len,
 | 
				
			||||||
			     struct ubuf_info *uarg)
 | 
								     struct ubuf_info *uarg)
 | 
				
			||||||
| 
						 | 
					@ -1131,7 +1138,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 | 
				
			||||||
		return err;
 | 
							return err;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb_zcopy_set(skb, uarg);
 | 
						skb_zcopy_set(skb, uarg, NULL);
 | 
				
			||||||
	return skb->len - orig_len;
 | 
						return skb->len - orig_len;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
 | 
					EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
 | 
				
			||||||
| 
						 | 
					@ -1151,7 +1158,7 @@ static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
 | 
				
			||||||
			if (skb_copy_ubufs(nskb, GFP_ATOMIC))
 | 
								if (skb_copy_ubufs(nskb, GFP_ATOMIC))
 | 
				
			||||||
				return -EIO;
 | 
									return -EIO;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		skb_zcopy_set(nskb, skb_uarg(orig));
 | 
							skb_zcopy_set(nskb, skb_uarg(orig), NULL);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1018,7 +1018,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	case SO_ZEROCOPY:
 | 
						case SO_ZEROCOPY:
 | 
				
			||||||
		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
 | 
							if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
 | 
				
			||||||
			if (sk->sk_protocol != IPPROTO_TCP)
 | 
								if (!((sk->sk_type == SOCK_STREAM &&
 | 
				
			||||||
 | 
								       sk->sk_protocol == IPPROTO_TCP) ||
 | 
				
			||||||
 | 
								      (sk->sk_type == SOCK_DGRAM &&
 | 
				
			||||||
 | 
								       sk->sk_protocol == IPPROTO_UDP)))
 | 
				
			||||||
				ret = -ENOTSUPP;
 | 
									ret = -ENOTSUPP;
 | 
				
			||||||
		} else if (sk->sk_family != PF_RDS) {
 | 
							} else if (sk->sk_family != PF_RDS) {
 | 
				
			||||||
			ret = -ENOTSUPP;
 | 
								ret = -ENOTSUPP;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -867,6 +867,7 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
			    unsigned int flags)
 | 
								    unsigned int flags)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct inet_sock *inet = inet_sk(sk);
 | 
						struct inet_sock *inet = inet_sk(sk);
 | 
				
			||||||
 | 
						struct ubuf_info *uarg = NULL;
 | 
				
			||||||
	struct sk_buff *skb;
 | 
						struct sk_buff *skb;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct ip_options *opt = cork->opt;
 | 
						struct ip_options *opt = cork->opt;
 | 
				
			||||||
| 
						 | 
					@ -880,8 +881,8 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
	int csummode = CHECKSUM_NONE;
 | 
						int csummode = CHECKSUM_NONE;
 | 
				
			||||||
	struct rtable *rt = (struct rtable *)cork->dst;
 | 
						struct rtable *rt = (struct rtable *)cork->dst;
 | 
				
			||||||
	unsigned int wmem_alloc_delta = 0;
 | 
						unsigned int wmem_alloc_delta = 0;
 | 
				
			||||||
 | 
						bool paged, extra_uref;
 | 
				
			||||||
	u32 tskey = 0;
 | 
						u32 tskey = 0;
 | 
				
			||||||
	bool paged;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb = skb_peek_tail(queue);
 | 
						skb = skb_peek_tail(queue);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -916,6 +917,20 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
	    (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
 | 
						    (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
 | 
				
			||||||
		csummode = CHECKSUM_PARTIAL;
 | 
							csummode = CHECKSUM_PARTIAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
 | 
				
			||||||
 | 
							uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
 | 
				
			||||||
 | 
							if (!uarg)
 | 
				
			||||||
 | 
								return -ENOBUFS;
 | 
				
			||||||
 | 
							extra_uref = true;
 | 
				
			||||||
 | 
							if (rt->dst.dev->features & NETIF_F_SG &&
 | 
				
			||||||
 | 
							    csummode == CHECKSUM_PARTIAL) {
 | 
				
			||||||
 | 
								paged = true;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								uarg->zerocopy = 0;
 | 
				
			||||||
 | 
								skb_zcopy_set(skb, uarg, &extra_uref);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cork->length += length;
 | 
						cork->length += length;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* So, what's going on in the loop below?
 | 
						/* So, what's going on in the loop below?
 | 
				
			||||||
| 
						 | 
					@ -1001,12 +1016,6 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
			skb->csum = 0;
 | 
								skb->csum = 0;
 | 
				
			||||||
			skb_reserve(skb, hh_len);
 | 
								skb_reserve(skb, hh_len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/* only the initial fragment is time stamped */
 | 
					 | 
				
			||||||
			skb_shinfo(skb)->tx_flags = cork->tx_flags;
 | 
					 | 
				
			||||||
			cork->tx_flags = 0;
 | 
					 | 
				
			||||||
			skb_shinfo(skb)->tskey = tskey;
 | 
					 | 
				
			||||||
			tskey = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 *	Find where to start putting bytes.
 | 
								 *	Find where to start putting bytes.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
| 
						 | 
					@ -1039,6 +1048,13 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
			exthdrlen = 0;
 | 
								exthdrlen = 0;
 | 
				
			||||||
			csummode = CHECKSUM_NONE;
 | 
								csummode = CHECKSUM_NONE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/* only the initial fragment is time stamped */
 | 
				
			||||||
 | 
								skb_shinfo(skb)->tx_flags = cork->tx_flags;
 | 
				
			||||||
 | 
								cork->tx_flags = 0;
 | 
				
			||||||
 | 
								skb_shinfo(skb)->tskey = tskey;
 | 
				
			||||||
 | 
								tskey = 0;
 | 
				
			||||||
 | 
								skb_zcopy_set(skb, uarg, &extra_uref);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if ((flags & MSG_CONFIRM) && !skb_prev)
 | 
								if ((flags & MSG_CONFIRM) && !skb_prev)
 | 
				
			||||||
				skb_set_dst_pending_confirm(skb, 1);
 | 
									skb_set_dst_pending_confirm(skb, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1068,7 +1084,7 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
				err = -EFAULT;
 | 
									err = -EFAULT;
 | 
				
			||||||
				goto error;
 | 
									goto error;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else if (!uarg || !uarg->zerocopy) {
 | 
				
			||||||
			int i = skb_shinfo(skb)->nr_frags;
 | 
								int i = skb_shinfo(skb)->nr_frags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			err = -ENOMEM;
 | 
								err = -ENOMEM;
 | 
				
			||||||
| 
						 | 
					@ -1098,6 +1114,10 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
			skb->data_len += copy;
 | 
								skb->data_len += copy;
 | 
				
			||||||
			skb->truesize += copy;
 | 
								skb->truesize += copy;
 | 
				
			||||||
			wmem_alloc_delta += copy;
 | 
								wmem_alloc_delta += copy;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								err = skb_zerocopy_iter_dgram(skb, from, copy);
 | 
				
			||||||
 | 
								if (err < 0)
 | 
				
			||||||
 | 
									goto error;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		offset += copy;
 | 
							offset += copy;
 | 
				
			||||||
		length -= copy;
 | 
							length -= copy;
 | 
				
			||||||
| 
						 | 
					@ -1110,6 +1130,7 @@ static int __ip_append_data(struct sock *sk,
 | 
				
			||||||
error_efault:
 | 
					error_efault:
 | 
				
			||||||
	err = -EFAULT;
 | 
						err = -EFAULT;
 | 
				
			||||||
error:
 | 
					error:
 | 
				
			||||||
 | 
						sock_zerocopy_put_abort(uarg, extra_uref);
 | 
				
			||||||
	cork->length -= length;
 | 
						cork->length -= length;
 | 
				
			||||||
	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
 | 
						IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
 | 
				
			||||||
	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 | 
						refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1423,7 +1423,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 | 
				
			||||||
	if (copied + copied_syn)
 | 
						if (copied + copied_syn)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
out_err:
 | 
					out_err:
 | 
				
			||||||
	sock_zerocopy_put_abort(uarg);
 | 
						sock_zerocopy_put_abort(uarg, true);
 | 
				
			||||||
	err = sk_stream_error(sk, flags, err);
 | 
						err = sk_stream_error(sk, flags, err);
 | 
				
			||||||
	/* make sure we wake any epoll edge trigger waiter */
 | 
						/* make sure we wake any epoll edge trigger waiter */
 | 
				
			||||||
	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
 | 
						if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1245,6 +1245,7 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct sk_buff *skb, *skb_prev = NULL;
 | 
						struct sk_buff *skb, *skb_prev = NULL;
 | 
				
			||||||
	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
 | 
						unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
 | 
				
			||||||
 | 
						struct ubuf_info *uarg = NULL;
 | 
				
			||||||
	int exthdrlen = 0;
 | 
						int exthdrlen = 0;
 | 
				
			||||||
	int dst_exthdrlen = 0;
 | 
						int dst_exthdrlen = 0;
 | 
				
			||||||
	int hh_len;
 | 
						int hh_len;
 | 
				
			||||||
| 
						 | 
					@ -1257,7 +1258,7 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
	int csummode = CHECKSUM_NONE;
 | 
						int csummode = CHECKSUM_NONE;
 | 
				
			||||||
	unsigned int maxnonfragsize, headersize;
 | 
						unsigned int maxnonfragsize, headersize;
 | 
				
			||||||
	unsigned int wmem_alloc_delta = 0;
 | 
						unsigned int wmem_alloc_delta = 0;
 | 
				
			||||||
	bool paged;
 | 
						bool paged, extra_uref;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb = skb_peek_tail(queue);
 | 
						skb = skb_peek_tail(queue);
 | 
				
			||||||
	if (!skb) {
 | 
						if (!skb) {
 | 
				
			||||||
| 
						 | 
					@ -1322,6 +1323,20 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 | 
						    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 | 
				
			||||||
		csummode = CHECKSUM_PARTIAL;
 | 
							csummode = CHECKSUM_PARTIAL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
 | 
				
			||||||
 | 
							uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
 | 
				
			||||||
 | 
							if (!uarg)
 | 
				
			||||||
 | 
								return -ENOBUFS;
 | 
				
			||||||
 | 
							extra_uref = true;
 | 
				
			||||||
 | 
							if (rt->dst.dev->features & NETIF_F_SG &&
 | 
				
			||||||
 | 
							    csummode == CHECKSUM_PARTIAL) {
 | 
				
			||||||
 | 
								paged = true;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								uarg->zerocopy = 0;
 | 
				
			||||||
 | 
								skb_zcopy_set(skb, uarg, &extra_uref);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Let's try using as much space as possible.
 | 
						 * Let's try using as much space as possible.
 | 
				
			||||||
	 * Use MTU if total length of the message fits into the MTU.
 | 
						 * Use MTU if total length of the message fits into the MTU.
 | 
				
			||||||
| 
						 | 
					@ -1440,12 +1455,6 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
 | 
								skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
 | 
				
			||||||
				    dst_exthdrlen);
 | 
									    dst_exthdrlen);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/* Only the initial fragment is time stamped */
 | 
					 | 
				
			||||||
			skb_shinfo(skb)->tx_flags = cork->tx_flags;
 | 
					 | 
				
			||||||
			cork->tx_flags = 0;
 | 
					 | 
				
			||||||
			skb_shinfo(skb)->tskey = tskey;
 | 
					 | 
				
			||||||
			tskey = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 *	Find where to start putting bytes
 | 
								 *	Find where to start putting bytes
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
| 
						 | 
					@ -1477,6 +1486,13 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
			exthdrlen = 0;
 | 
								exthdrlen = 0;
 | 
				
			||||||
			dst_exthdrlen = 0;
 | 
								dst_exthdrlen = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/* Only the initial fragment is time stamped */
 | 
				
			||||||
 | 
								skb_shinfo(skb)->tx_flags = cork->tx_flags;
 | 
				
			||||||
 | 
								cork->tx_flags = 0;
 | 
				
			||||||
 | 
								skb_shinfo(skb)->tskey = tskey;
 | 
				
			||||||
 | 
								tskey = 0;
 | 
				
			||||||
 | 
								skb_zcopy_set(skb, uarg, &extra_uref);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if ((flags & MSG_CONFIRM) && !skb_prev)
 | 
								if ((flags & MSG_CONFIRM) && !skb_prev)
 | 
				
			||||||
				skb_set_dst_pending_confirm(skb, 1);
 | 
									skb_set_dst_pending_confirm(skb, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1506,7 +1522,7 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
				err = -EFAULT;
 | 
									err = -EFAULT;
 | 
				
			||||||
				goto error;
 | 
									goto error;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else if (!uarg || !uarg->zerocopy) {
 | 
				
			||||||
			int i = skb_shinfo(skb)->nr_frags;
 | 
								int i = skb_shinfo(skb)->nr_frags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			err = -ENOMEM;
 | 
								err = -ENOMEM;
 | 
				
			||||||
| 
						 | 
					@ -1536,6 +1552,10 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
			skb->data_len += copy;
 | 
								skb->data_len += copy;
 | 
				
			||||||
			skb->truesize += copy;
 | 
								skb->truesize += copy;
 | 
				
			||||||
			wmem_alloc_delta += copy;
 | 
								wmem_alloc_delta += copy;
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								err = skb_zerocopy_iter_dgram(skb, from, copy);
 | 
				
			||||||
 | 
								if (err < 0)
 | 
				
			||||||
 | 
									goto error;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		offset += copy;
 | 
							offset += copy;
 | 
				
			||||||
		length -= copy;
 | 
							length -= copy;
 | 
				
			||||||
| 
						 | 
					@ -1548,6 +1568,7 @@ static int __ip6_append_data(struct sock *sk,
 | 
				
			||||||
error_efault:
 | 
					error_efault:
 | 
				
			||||||
	err = -EFAULT;
 | 
						err = -EFAULT;
 | 
				
			||||||
error:
 | 
					error:
 | 
				
			||||||
 | 
						sock_zerocopy_put_abort(uarg, extra_uref);
 | 
				
			||||||
	cork->length -= length;
 | 
						cork->length -= length;
 | 
				
			||||||
	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 | 
						IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 | 
				
			||||||
	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 | 
						refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -651,12 +651,13 @@ static void do_flush_datagram(int fd, int type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void do_rx(int domain, int type, int protocol)
 | 
					static void do_rx(int domain, int type, int protocol)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						const int cfg_receiver_wait_ms = 400;
 | 
				
			||||||
	uint64_t tstop;
 | 
						uint64_t tstop;
 | 
				
			||||||
	int fd;
 | 
						int fd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	fd = do_setup_rx(domain, type, protocol);
 | 
						fd = do_setup_rx(domain, type, protocol);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tstop = gettimeofday_ms() + cfg_runtime_ms;
 | 
						tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
 | 
				
			||||||
	do {
 | 
						do {
 | 
				
			||||||
		if (type == SOCK_STREAM)
 | 
							if (type == SOCK_STREAM)
 | 
				
			||||||
			do_flush_tcp(fd);
 | 
								do_flush_tcp(fd);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,6 +25,8 @@ readonly path_sysctl_mem="net.core.optmem_max"
 | 
				
			||||||
if [[ "$#" -eq "0" ]]; then
 | 
					if [[ "$#" -eq "0" ]]; then
 | 
				
			||||||
	$0 4 tcp -t 1
 | 
						$0 4 tcp -t 1
 | 
				
			||||||
	$0 6 tcp -t 1
 | 
						$0 6 tcp -t 1
 | 
				
			||||||
 | 
						$0 4 udp -t 1
 | 
				
			||||||
 | 
						$0 6 udp -t 1
 | 
				
			||||||
	echo "OK. All tests passed"
 | 
						echo "OK. All tests passed"
 | 
				
			||||||
	exit 0
 | 
						exit 0
 | 
				
			||||||
fi
 | 
					fi
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -35,6 +35,9 @@ run_udp() {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	echo "udp gso"
 | 
						echo "udp gso"
 | 
				
			||||||
	run_in_netns ${args} -S 0
 | 
						run_in_netns ${args} -S 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						echo "udp gso zerocopy"
 | 
				
			||||||
 | 
						run_in_netns ${args} -S 0 -z
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
run_tcp() {
 | 
					run_tcp() {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue