forked from mirrors/linux
		
	tcp: adjust tail loss probe timeout
This patch adjusts the timeout formula to schedule the TCP loss probe (TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if only one packet is in flight. It keeps a lower bound of 10 msec which is too large for short RTT connections (e.g. within a data-center). The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which performs better for short and fast connections. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									c4b2bf6b4a
								
							
						
					
					
						commit
						bb4d991a28
					
				
					 3 changed files with 12 additions and 10 deletions
				
			
		|  | @ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); | ||||||
| #endif | #endif | ||||||
| #define TCP_RTO_MAX	((unsigned)(120*HZ)) | #define TCP_RTO_MAX	((unsigned)(120*HZ)) | ||||||
| #define TCP_RTO_MIN	((unsigned)(HZ/5)) | #define TCP_RTO_MIN	((unsigned)(HZ/5)) | ||||||
|  | #define TCP_TIMEOUT_MIN	(2U) /* Min timeout for TCP timers in jiffies */ | ||||||
| #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/ | #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/ | ||||||
| #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now | #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now | ||||||
| 						 * used as a fallback RTO for the | 						 * used as a fallback RTO for the | ||||||
|  | @ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); | ||||||
| #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes | #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes | ||||||
| 					                 * for local resources. | 					                 * for local resources. | ||||||
| 					                 */ | 					                 */ | ||||||
| #define TCP_REO_TIMEOUT_MIN	(2000) /* Min RACK reordering timeout in usec */ |  | ||||||
| 
 |  | ||||||
| #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */ | #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */ | ||||||
| #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/ | #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/ | ||||||
| #define TCP_KEEPALIVE_INTVL	(75*HZ) | #define TCP_KEEPALIVE_INTVL	(75*HZ) | ||||||
|  |  | ||||||
|  | @ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk) | ||||||
| 	struct inet_connection_sock *icsk = inet_csk(sk); | 	struct inet_connection_sock *icsk = inet_csk(sk); | ||||||
| 	struct tcp_sock *tp = tcp_sk(sk); | 	struct tcp_sock *tp = tcp_sk(sk); | ||||||
| 	u32 timeout, tlp_time_stamp, rto_time_stamp; | 	u32 timeout, tlp_time_stamp, rto_time_stamp; | ||||||
| 	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); |  | ||||||
| 
 | 
 | ||||||
| 	/* No consecutive loss probes. */ | 	/* No consecutive loss probes. */ | ||||||
| 	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { | 	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { | ||||||
|  | @ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk) | ||||||
| 	     tcp_send_head(sk)) | 	     tcp_send_head(sk)) | ||||||
| 		return false; | 		return false; | ||||||
| 
 | 
 | ||||||
| 	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
 | 	/* Probe timeout is 2*rtt. Add minimum RTO to account
 | ||||||
| 	 * for delayed ack when there's one outstanding packet. If no RTT | 	 * for delayed ack when there's one outstanding packet. If no RTT | ||||||
| 	 * sample is available then probe after TCP_TIMEOUT_INIT. | 	 * sample is available then probe after TCP_TIMEOUT_INIT. | ||||||
| 	 */ | 	 */ | ||||||
| 	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT; | 	if (tp->srtt_us) { | ||||||
| 	if (tp->packets_out == 1) | 		timeout = usecs_to_jiffies(tp->srtt_us >> 2); | ||||||
| 		timeout = max_t(u32, timeout, | 		if (tp->packets_out == 1) | ||||||
| 				(rtt + (rtt >> 1) + TCP_DELACK_MAX)); | 			timeout += TCP_RTO_MIN; | ||||||
| 	timeout = max_t(u32, timeout, msecs_to_jiffies(10)); | 		else | ||||||
|  | 			timeout += TCP_TIMEOUT_MIN; | ||||||
|  | 	} else { | ||||||
|  | 		timeout = TCP_TIMEOUT_INIT; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/* If RTO is shorter, just schedule TLP in its place. */ | 	/* If RTO is shorter, just schedule TLP in its place. */ | ||||||
| 	tlp_time_stamp = tcp_jiffies32 + timeout; | 	tlp_time_stamp = tcp_jiffies32 + timeout; | ||||||
|  |  | ||||||
|  | @ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk) | ||||||
| 	tp->rack.advanced = 0; | 	tp->rack.advanced = 0; | ||||||
| 	tcp_rack_detect_loss(sk, &timeout); | 	tcp_rack_detect_loss(sk, &timeout); | ||||||
| 	if (timeout) { | 	if (timeout) { | ||||||
| 		timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN); | 		timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN; | ||||||
| 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, | 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, | ||||||
| 					  timeout, inet_csk(sk)->icsk_rto); | 					  timeout, inet_csk(sk)->icsk_rto); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Yuchung Cheng
						Yuchung Cheng