forked from mirrors/linux
		
	mptcp: handle pending data on closed subflow
The PM can close active subflow, e.g. due to ingress RM_ADDR option. Such subflow could carry data still unacked at the MPTCP-level, both in the write and the rtx_queue, which has never reached the other peer. Currently the mptcp-level retransmission will deliver such data, but at a very low rate (at most 1 DSM for each MPTCP rtx interval). We can speed-up the recovery a lot, moving all the unacked in the tcp write_queue, so that it will be pushed again via other subflows, at the speed allowed by them. Also make available the new helper for later patches. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/207 Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									71b7dec27f
								
							
						
					
					
						commit
						1e1d9d6f11
					
				
					 3 changed files with 82 additions and 8 deletions
				
			
		| 
						 | 
					@ -975,9 +975,11 @@ static void ack_update_msk(struct mptcp_sock *msk,
 | 
				
			||||||
	old_snd_una = msk->snd_una;
 | 
						old_snd_una = msk->snd_una;
 | 
				
			||||||
	new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
 | 
						new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* ACK for data not even sent yet? Ignore. */
 | 
						/* ACK for data not even sent yet and even above recovery bound? Ignore.*/
 | 
				
			||||||
	if (after64(new_snd_una, snd_nxt))
 | 
						if (unlikely(after64(new_snd_una, snd_nxt))) {
 | 
				
			||||||
 | 
							if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
 | 
				
			||||||
			new_snd_una = old_snd_una;
 | 
								new_snd_una = old_snd_una;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
 | 
						new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1055,8 +1055,14 @@ static void __mptcp_clean_una(struct sock *sk)
 | 
				
			||||||
		if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
 | 
							if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (WARN_ON_ONCE(dfrag == msk->first_pending))
 | 
							if (unlikely(dfrag == msk->first_pending)) {
 | 
				
			||||||
 | 
								/* in recovery mode can see ack after the current snd head */
 | 
				
			||||||
 | 
								if (WARN_ON_ONCE(!msk->recovery))
 | 
				
			||||||
				break;
 | 
									break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		dfrag_clear(sk, dfrag);
 | 
							dfrag_clear(sk, dfrag);
 | 
				
			||||||
		cleaned = true;
 | 
							cleaned = true;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -1065,8 +1071,14 @@ static void __mptcp_clean_una(struct sock *sk)
 | 
				
			||||||
	if (dfrag && after64(snd_una, dfrag->data_seq)) {
 | 
						if (dfrag && after64(snd_una, dfrag->data_seq)) {
 | 
				
			||||||
		u64 delta = snd_una - dfrag->data_seq;
 | 
							u64 delta = snd_una - dfrag->data_seq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (WARN_ON_ONCE(delta > dfrag->already_sent))
 | 
							/* prevent wrap around in recovery mode */
 | 
				
			||||||
 | 
							if (unlikely(delta > dfrag->already_sent)) {
 | 
				
			||||||
 | 
								if (WARN_ON_ONCE(!msk->recovery))
 | 
				
			||||||
				goto out;
 | 
									goto out;
 | 
				
			||||||
 | 
								if (WARN_ON_ONCE(delta > dfrag->data_len))
 | 
				
			||||||
 | 
									goto out;
 | 
				
			||||||
 | 
								dfrag->already_sent += delta - dfrag->already_sent;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		dfrag->data_seq += delta;
 | 
							dfrag->data_seq += delta;
 | 
				
			||||||
		dfrag->offset += delta;
 | 
							dfrag->offset += delta;
 | 
				
			||||||
| 
						 | 
					@ -1077,6 +1089,10 @@ static void __mptcp_clean_una(struct sock *sk)
 | 
				
			||||||
		cleaned = true;
 | 
							cleaned = true;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* all retransmitted data acked, recovery completed */
 | 
				
			||||||
 | 
						if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt))
 | 
				
			||||||
 | 
							msk->recovery = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	if (cleaned) {
 | 
						if (cleaned) {
 | 
				
			||||||
		if (tcp_under_memory_pressure(sk)) {
 | 
							if (tcp_under_memory_pressure(sk)) {
 | 
				
			||||||
| 
						 | 
					@ -1085,7 +1101,7 @@ static void __mptcp_clean_una(struct sock *sk)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (snd_una == READ_ONCE(msk->snd_nxt)) {
 | 
						if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
 | 
				
			||||||
		if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
 | 
							if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
 | 
				
			||||||
			mptcp_stop_timer(sk);
 | 
								mptcp_stop_timer(sk);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
| 
						 | 
					@ -2148,6 +2164,50 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool __mptcp_retransmit_pending_data(struct sock *sk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct mptcp_data_frag *cur, *rtx_head;
 | 
				
			||||||
 | 
						struct mptcp_sock *msk = mptcp_sk(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (__mptcp_check_fallback(mptcp_sk(sk)))
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (tcp_rtx_and_write_queues_empty(sk))
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* the closing socket has some data untransmitted and/or unacked:
 | 
				
			||||||
 | 
						 * some data in the mptcp rtx queue has not really xmitted yet.
 | 
				
			||||||
 | 
						 * keep it simple and re-inject the whole mptcp level rtx queue
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						mptcp_data_lock(sk);
 | 
				
			||||||
 | 
						__mptcp_clean_una_wakeup(sk);
 | 
				
			||||||
 | 
						rtx_head = mptcp_rtx_head(sk);
 | 
				
			||||||
 | 
						if (!rtx_head) {
 | 
				
			||||||
 | 
							mptcp_data_unlock(sk);
 | 
				
			||||||
 | 
							return false;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* will accept ack for reijected data before re-sending them */
 | 
				
			||||||
 | 
						if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
 | 
				
			||||||
 | 
							msk->recovery_snd_nxt = msk->snd_nxt;
 | 
				
			||||||
 | 
						msk->recovery = true;
 | 
				
			||||||
 | 
						mptcp_data_unlock(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						msk->first_pending = rtx_head;
 | 
				
			||||||
 | 
						msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
 | 
				
			||||||
 | 
						msk->snd_nxt = rtx_head->data_seq;
 | 
				
			||||||
 | 
						msk->snd_burst = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* be sure to clear the "sent status" on all re-injected fragments */
 | 
				
			||||||
 | 
						list_for_each_entry(cur, &msk->rtx_queue, list) {
 | 
				
			||||||
 | 
							if (!cur->already_sent)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							cur->already_sent = 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* subflow sockets can be either outgoing (connect) or incoming
 | 
					/* subflow sockets can be either outgoing (connect) or incoming
 | 
				
			||||||
 * (accept).
 | 
					 * (accept).
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -2160,6 +2220,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 | 
				
			||||||
			      struct mptcp_subflow_context *subflow)
 | 
								      struct mptcp_subflow_context *subflow)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct mptcp_sock *msk = mptcp_sk(sk);
 | 
						struct mptcp_sock *msk = mptcp_sk(sk);
 | 
				
			||||||
 | 
						bool need_push;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_del(&subflow->node);
 | 
						list_del(&subflow->node);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2171,6 +2232,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 | 
				
			||||||
	if (ssk->sk_socket)
 | 
						if (ssk->sk_socket)
 | 
				
			||||||
		sock_orphan(ssk);
 | 
							sock_orphan(ssk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						need_push = __mptcp_retransmit_pending_data(sk);
 | 
				
			||||||
	subflow->disposable = 1;
 | 
						subflow->disposable = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
 | 
						/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
 | 
				
			||||||
| 
						 | 
					@ -2198,6 +2260,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (msk->subflow && ssk == msk->subflow->sk)
 | 
						if (msk->subflow && ssk == msk->subflow->sk)
 | 
				
			||||||
		mptcp_dispose_initial_subflow(msk);
 | 
							mptcp_dispose_initial_subflow(msk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (need_push)
 | 
				
			||||||
 | 
							__mptcp_push_pending(sk, 0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 | 
					void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 | 
				
			||||||
| 
						 | 
					@ -2410,6 +2475,7 @@ static int __mptcp_init_sock(struct sock *sk)
 | 
				
			||||||
	msk->first = NULL;
 | 
						msk->first = NULL;
 | 
				
			||||||
	inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
 | 
						inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
 | 
				
			||||||
	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
 | 
						WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
 | 
				
			||||||
 | 
						msk->recovery = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mptcp_pm_data_init(msk);
 | 
						mptcp_pm_data_init(msk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -230,12 +230,17 @@ struct mptcp_sock {
 | 
				
			||||||
	struct sock	*last_snd;
 | 
						struct sock	*last_snd;
 | 
				
			||||||
	int		snd_burst;
 | 
						int		snd_burst;
 | 
				
			||||||
	int		old_wspace;
 | 
						int		old_wspace;
 | 
				
			||||||
 | 
						u64		recovery_snd_nxt;	/* in recovery mode accept up to this seq;
 | 
				
			||||||
 | 
											 * recovery related fields are under data_lock
 | 
				
			||||||
 | 
											 * protection
 | 
				
			||||||
 | 
											 */
 | 
				
			||||||
	u64		snd_una;
 | 
						u64		snd_una;
 | 
				
			||||||
	u64		wnd_end;
 | 
						u64		wnd_end;
 | 
				
			||||||
	unsigned long	timer_ival;
 | 
						unsigned long	timer_ival;
 | 
				
			||||||
	u32		token;
 | 
						u32		token;
 | 
				
			||||||
	int		rmem_released;
 | 
						int		rmem_released;
 | 
				
			||||||
	unsigned long	flags;
 | 
						unsigned long	flags;
 | 
				
			||||||
 | 
						bool		recovery;		/* closing subflow write queue reinjected */
 | 
				
			||||||
	bool		can_ack;
 | 
						bool		can_ack;
 | 
				
			||||||
	bool		fully_established;
 | 
						bool		fully_established;
 | 
				
			||||||
	bool		rcv_data_fin;
 | 
						bool		rcv_data_fin;
 | 
				
			||||||
| 
						 | 
					@ -557,6 +562,7 @@ int mptcp_is_checksum_enabled(struct net *net);
 | 
				
			||||||
int mptcp_allow_join_id0(struct net *net);
 | 
					int mptcp_allow_join_id0(struct net *net);
 | 
				
			||||||
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 | 
					void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 | 
				
			||||||
				     struct mptcp_options_received *mp_opt);
 | 
									     struct mptcp_options_received *mp_opt);
 | 
				
			||||||
 | 
					bool __mptcp_retransmit_pending_data(struct sock *sk);
 | 
				
			||||||
bool mptcp_subflow_data_available(struct sock *sk);
 | 
					bool mptcp_subflow_data_available(struct sock *sk);
 | 
				
			||||||
void __init mptcp_subflow_init(void);
 | 
					void __init mptcp_subflow_init(void);
 | 
				
			||||||
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
 | 
					void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue