forked from mirrors/linux
		
	mptcp: stop relying on tcp_tx_skb_cache
We want to revert the skb TX cache, but MPTCP is currently using it unconditionally. Rework the MPTCP tx code, so that tcp_tx_skb_cache is not needed anymore: do the whole coalescing check, skb allocation skb initialization/update inside mptcp_sendmsg_frag(), quite alike the current TCP code. Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									04d8825c30
								
							
						
					
					
						commit
						f70cad1085
					
				
					 1 changed files with 77 additions and 60 deletions
				
			
		|  | @ -1224,6 +1224,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) | |||
| 		if (likely(__mptcp_add_ext(skb, gfp))) { | ||||
| 			skb_reserve(skb, MAX_TCP_HEADER); | ||||
| 			skb->reserved_tailroom = skb->end - skb->tail; | ||||
| 			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); | ||||
| 			return skb; | ||||
| 		} | ||||
| 		__kfree_skb(skb); | ||||
|  | @ -1233,31 +1234,23 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) | |||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) | ||||
| static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) | ||||
| { | ||||
| 	struct sk_buff *skb; | ||||
| 
 | ||||
| 	if (ssk->sk_tx_skb_cache) { | ||||
| 		skb = ssk->sk_tx_skb_cache; | ||||
| 		if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) && | ||||
| 			     !__mptcp_add_ext(skb, gfp))) | ||||
| 			return false; | ||||
| 		return true; | ||||
| 	} | ||||
| 
 | ||||
| 	skb = __mptcp_do_alloc_tx_skb(sk, gfp); | ||||
| 	if (!skb) | ||||
| 		return false; | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	if (likely(sk_wmem_schedule(ssk, skb->truesize))) { | ||||
| 		ssk->sk_tx_skb_cache = skb; | ||||
| 		return true; | ||||
| 		tcp_skb_entail(ssk, skb); | ||||
| 		return skb; | ||||
| 	} | ||||
| 	kfree_skb(skb); | ||||
| 	return false; | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held) | ||||
| static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held) | ||||
| { | ||||
| 	gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation; | ||||
| 
 | ||||
|  | @ -1287,23 +1280,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, | |||
| 			      struct mptcp_sendmsg_info *info) | ||||
| { | ||||
| 	u64 data_seq = dfrag->data_seq + info->sent; | ||||
| 	int offset = dfrag->offset + info->sent; | ||||
| 	struct mptcp_sock *msk = mptcp_sk(sk); | ||||
| 	bool zero_window_probe = false; | ||||
| 	struct mptcp_ext *mpext = NULL; | ||||
| 	struct sk_buff *skb, *tail; | ||||
| 	bool must_collapse = false; | ||||
| 	int size_bias = 0; | ||||
| 	int avail_size; | ||||
| 	size_t ret = 0; | ||||
| 	bool can_coalesce = false; | ||||
| 	bool reuse_skb = true; | ||||
| 	struct sk_buff *skb; | ||||
| 	size_t copy; | ||||
| 	int i; | ||||
| 
 | ||||
| 	pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u", | ||||
| 		 msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); | ||||
| 
 | ||||
| 	if (WARN_ON_ONCE(info->sent > info->limit || | ||||
| 			 info->limit > dfrag->data_len)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	/* compute send limit */ | ||||
| 	info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); | ||||
| 	avail_size = info->size_goal; | ||||
| 	copy = info->size_goal; | ||||
| 
 | ||||
| 	skb = tcp_write_queue_tail(ssk); | ||||
| 	if (skb) { | ||||
| 	if (skb && copy > skb->len) { | ||||
| 		/* Limit the write to the size available in the
 | ||||
| 		 * current skb, if any, so that we create at most a new skb. | ||||
| 		 * Explicitly tells TCP internals to avoid collapsing on later | ||||
|  | @ -1316,62 +1315,80 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, | |||
| 			goto alloc_skb; | ||||
| 		} | ||||
| 
 | ||||
| 		must_collapse = (info->size_goal > skb->len) && | ||||
| 				(skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags); | ||||
| 		if (must_collapse) { | ||||
| 			size_bias = skb->len; | ||||
| 			avail_size = info->size_goal - skb->len; | ||||
| 		i = skb_shinfo(skb)->nr_frags; | ||||
| 		can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); | ||||
| 		if (!can_coalesce && i >= sysctl_max_skb_frags) { | ||||
| 			tcp_mark_push(tcp_sk(ssk), skb); | ||||
| 			goto alloc_skb; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 		copy -= skb->len; | ||||
| 	} else { | ||||
| alloc_skb: | ||||
| 	if (!must_collapse && | ||||
| 	    !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held)) | ||||
| 		return 0; | ||||
| 		skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held); | ||||
| 		if (!skb) | ||||
| 			return -ENOMEM; | ||||
| 
 | ||||
| 		i = skb_shinfo(skb)->nr_frags; | ||||
| 		reuse_skb = false; | ||||
| 		mpext = skb_ext_find(skb, SKB_EXT_MPTCP); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Zero window and all data acked? Probe. */ | ||||
| 	avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size); | ||||
| 	if (avail_size == 0) { | ||||
| 	copy = mptcp_check_allowed_size(msk, data_seq, copy); | ||||
| 	if (copy == 0) { | ||||
| 		u64 snd_una = READ_ONCE(msk->snd_una); | ||||
| 
 | ||||
| 		if (skb || snd_una != msk->snd_nxt) | ||||
| 		if (snd_una != msk->snd_nxt) { | ||||
| 			tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk)); | ||||
| 			return 0; | ||||
| 		} | ||||
| 
 | ||||
| 		zero_window_probe = true; | ||||
| 		data_seq = snd_una - 1; | ||||
| 		avail_size = 1; | ||||
| 		copy = 1; | ||||
| 
 | ||||
| 		/* all mptcp-level data is acked, no skbs should be present into the
 | ||||
| 		 * ssk write queue | ||||
| 		 */ | ||||
| 		WARN_ON_ONCE(reuse_skb); | ||||
| 	} | ||||
| 
 | ||||
| 	if (WARN_ON_ONCE(info->sent > info->limit || | ||||
| 			 info->limit > dfrag->data_len)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	ret = info->limit - info->sent; | ||||
| 	tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags, | ||||
| 			      dfrag->page, dfrag->offset + info->sent, &ret); | ||||
| 	if (!tail) { | ||||
| 		tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk)); | ||||
| 	copy = min_t(size_t, copy, info->limit - info->sent); | ||||
| 	if (!sk_wmem_schedule(ssk, copy)) { | ||||
| 		tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk)); | ||||
| 		return -ENOMEM; | ||||
| 	} | ||||
| 
 | ||||
| 	/* if the tail skb is still the cached one, collapsing really happened.
 | ||||
| 	 */ | ||||
| 	if (skb == tail) { | ||||
| 		TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH; | ||||
| 		mpext->data_len += ret; | ||||
| 		WARN_ON_ONCE(zero_window_probe); | ||||
| 		goto out; | ||||
| 	if (can_coalesce) { | ||||
| 		skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | ||||
| 	} else { | ||||
| 		get_page(dfrag->page); | ||||
| 		skb_fill_page_desc(skb, i, dfrag->page, offset, copy); | ||||
| 	} | ||||
| 
 | ||||
| 	mpext = skb_ext_find(tail, SKB_EXT_MPTCP); | ||||
| 	if (WARN_ON_ONCE(!mpext)) { | ||||
| 		/* should never reach here, stream corrupted */ | ||||
| 		return -EINVAL; | ||||
| 	skb->len += copy; | ||||
| 	skb->data_len += copy; | ||||
| 	skb->truesize += copy; | ||||
| 	sk_wmem_queued_add(ssk, copy); | ||||
| 	sk_mem_charge(ssk, copy); | ||||
| 	skb->ip_summed = CHECKSUM_PARTIAL; | ||||
| 	WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy); | ||||
| 	TCP_SKB_CB(skb)->end_seq += copy; | ||||
| 	tcp_skb_pcount_set(skb, 0); | ||||
| 
 | ||||
| 	/* on skb reuse we just need to update the DSS len */ | ||||
| 	if (reuse_skb) { | ||||
| 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; | ||||
| 		mpext->data_len += copy; | ||||
| 		WARN_ON_ONCE(zero_window_probe); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	memset(mpext, 0, sizeof(*mpext)); | ||||
| 	mpext->data_seq = data_seq; | ||||
| 	mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq; | ||||
| 	mpext->data_len = ret; | ||||
| 	mpext->data_len = copy; | ||||
| 	mpext->use_map = 1; | ||||
| 	mpext->dsn64 = 1; | ||||
| 
 | ||||
|  | @ -1380,18 +1397,18 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, | |||
| 		 mpext->dsn64); | ||||
| 
 | ||||
| 	if (zero_window_probe) { | ||||
| 		mptcp_subflow_ctx(ssk)->rel_write_seq += ret; | ||||
| 		mptcp_subflow_ctx(ssk)->rel_write_seq += copy; | ||||
| 		mpext->frozen = 1; | ||||
| 		if (READ_ONCE(msk->csum_enabled)) | ||||
| 			mptcp_update_data_checksum(tail, ret); | ||||
| 			mptcp_update_data_checksum(skb, copy); | ||||
| 		tcp_push_pending_frames(ssk); | ||||
| 		return 0; | ||||
| 	} | ||||
| out: | ||||
| 	if (READ_ONCE(msk->csum_enabled)) | ||||
| 		mptcp_update_data_checksum(tail, ret); | ||||
| 	mptcp_subflow_ctx(ssk)->rel_write_seq += ret; | ||||
| 	return ret; | ||||
| 		mptcp_update_data_checksum(skb, copy); | ||||
| 	mptcp_subflow_ctx(ssk)->rel_write_seq += copy; | ||||
| 	return copy; | ||||
| } | ||||
| 
 | ||||
| #define MPTCP_SEND_BURST_SIZE		((1 << 16) - \ | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Paolo Abeni
						Paolo Abeni