forked from mirrors/linux
		
	tcp: allow at most one TLP probe per flight
Previously TLP may send multiple probes of new data in one flight. This happens when the sender is cwnd limited. After the initial TLP containing new data is sent, the sender receives another ACK that acks partial inflight. It may re-arm another TLP timer to send more, if no further ACK returns before the next TLP timeout (PTO) expires. The sender may send in theory a large amount of TLP until send queue is depleted. This only happens if the sender sees such irregular uncommon ACK pattern. But it is generally undesirable behavior during congestion especially. The original TLP design restrict only one TLP probe per inflight as published in "Reducing Web Latency: the Virtue of Gentle Aggression", SIGCOMM 2013. This patch changes TLP to send at most one probe per inflight. Note that if the sender is app-limited, TLP retransmits old data and did not have this issue. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									17ad73e941
								
							
						
					
					
						commit
						76be93fc07
					
				
					 3 changed files with 18 additions and 12 deletions
				
			
		|  | @ -220,7 +220,9 @@ struct tcp_sock { | ||||||
| 	} rack; | 	} rack; | ||||||
| 	u16	advmss;		/* Advertised MSS			*/ | 	u16	advmss;		/* Advertised MSS			*/ | ||||||
| 	u8	compressed_ack; | 	u8	compressed_ack; | ||||||
| 	u8	dup_ack_counter; | 	u8	dup_ack_counter:2, | ||||||
|  | 		tlp_retrans:1,	/* TLP is a retransmission */ | ||||||
|  | 		unused:5; | ||||||
| 	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */ | 	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */ | ||||||
| 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */ | 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */ | ||||||
| 	u8	chrono_type:2,	/* current chronograph type */ | 	u8	chrono_type:2,	/* current chronograph type */ | ||||||
|  | @ -243,7 +245,7 @@ struct tcp_sock { | ||||||
| 		save_syn:1,	/* Save headers of SYN packet */ | 		save_syn:1,	/* Save headers of SYN packet */ | ||||||
| 		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ | 		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ | ||||||
| 		syn_smc:1;	/* SYN includes SMC */ | 		syn_smc:1;	/* SYN includes SMC */ | ||||||
| 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */ | 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */ | ||||||
| 
 | 
 | ||||||
| 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */ | 	u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */ | ||||||
| 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */ | 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */ | ||||||
|  |  | ||||||
|  | @ -3488,10 +3488,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* This routine deals with acks during a TLP episode.
 | /* This routine deals with acks during a TLP episode and ends an episode by
 | ||||||
|  * We mark the end of a TLP episode on receiving TLP dupack or when |  * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack | ||||||
|  * ack is after tlp_high_seq. |  | ||||||
|  * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. |  | ||||||
|  */ |  */ | ||||||
| static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | ||||||
| { | { | ||||||
|  | @ -3500,7 +3498,10 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) | ||||||
| 	if (before(ack, tp->tlp_high_seq)) | 	if (before(ack, tp->tlp_high_seq)) | ||||||
| 		return; | 		return; | ||||||
| 
 | 
 | ||||||
| 	if (flag & FLAG_DSACKING_ACK) { | 	if (!tp->tlp_retrans) { | ||||||
|  | 		/* TLP of new data has been acknowledged */ | ||||||
|  | 		tp->tlp_high_seq = 0; | ||||||
|  | 	} else if (flag & FLAG_DSACKING_ACK) { | ||||||
| 		/* This DSACK means original and TLP probe arrived; no loss */ | 		/* This DSACK means original and TLP probe arrived; no loss */ | ||||||
| 		tp->tlp_high_seq = 0; | 		tp->tlp_high_seq = 0; | ||||||
| 	} else if (after(ack, tp->tlp_high_seq)) { | 	} else if (after(ack, tp->tlp_high_seq)) { | ||||||
|  |  | ||||||
|  | @ -2624,6 +2624,11 @@ void tcp_send_loss_probe(struct sock *sk) | ||||||
| 	int pcount; | 	int pcount; | ||||||
| 	int mss = tcp_current_mss(sk); | 	int mss = tcp_current_mss(sk); | ||||||
| 
 | 
 | ||||||
|  | 	/* At most one outstanding TLP */ | ||||||
|  | 	if (tp->tlp_high_seq) | ||||||
|  | 		goto rearm_timer; | ||||||
|  | 
 | ||||||
|  | 	tp->tlp_retrans = 0; | ||||||
| 	skb = tcp_send_head(sk); | 	skb = tcp_send_head(sk); | ||||||
| 	if (skb && tcp_snd_wnd_test(tp, skb, mss)) { | 	if (skb && tcp_snd_wnd_test(tp, skb, mss)) { | ||||||
| 		pcount = tp->packets_out; | 		pcount = tp->packets_out; | ||||||
|  | @ -2641,10 +2646,6 @@ void tcp_send_loss_probe(struct sock *sk) | ||||||
| 		return; | 		return; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* At most one outstanding TLP retransmission. */ |  | ||||||
| 	if (tp->tlp_high_seq) |  | ||||||
| 		goto rearm_timer; |  | ||||||
| 
 |  | ||||||
| 	if (skb_still_in_host_queue(sk, skb)) | 	if (skb_still_in_host_queue(sk, skb)) | ||||||
| 		goto rearm_timer; | 		goto rearm_timer; | ||||||
| 
 | 
 | ||||||
|  | @ -2666,10 +2667,12 @@ void tcp_send_loss_probe(struct sock *sk) | ||||||
| 	if (__tcp_retransmit_skb(sk, skb, 1)) | 	if (__tcp_retransmit_skb(sk, skb, 1)) | ||||||
| 		goto rearm_timer; | 		goto rearm_timer; | ||||||
| 
 | 
 | ||||||
|  | 	tp->tlp_retrans = 1; | ||||||
|  | 
 | ||||||
|  | probe_sent: | ||||||
| 	/* Record snd_nxt for loss detection. */ | 	/* Record snd_nxt for loss detection. */ | ||||||
| 	tp->tlp_high_seq = tp->snd_nxt; | 	tp->tlp_high_seq = tp->snd_nxt; | ||||||
| 
 | 
 | ||||||
| probe_sent: |  | ||||||
| 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); | 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); | ||||||
| 	/* Reset s.t. tcp_rearm_rto will restart timer from now */ | 	/* Reset s.t. tcp_rearm_rto will restart timer from now */ | ||||||
| 	inet_csk(sk)->icsk_pending = 0; | 	inet_csk(sk)->icsk_pending = 0; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Yuchung Cheng
						Yuchung Cheng