forked from mirrors/linux
		
	tcp: refine pacing rate determination
When TCP pacing was added back in linux-3.12, we chose to apply a fixed ratio of 200 % against current rate, to allow probing for optimal throughput even during slow start phase, where cwnd can be doubled every other gRTT. At Google, we found it was better applying a different ratio while in Congestion Avoidance phase. This ratio was set to 120 %. We've used the normal tcp_in_slow_start() helper for a while, then tuned the condition to select the conservative ratio as soon as cwnd >= ssthresh/2 : - After cwnd reduction, it is safer to ramp up more slowly, as we approach optimal cwnd. - Initial ramp up (ssthresh == INFINITY) still allows doubling cwnd every other RTT. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									4ec3b28c27
								
							
						
					
					
						commit
						43e122b014
					
				
					 4 changed files with 53 additions and 1 deletions
				
			
		|  | @ -586,6 +586,21 @@ tcp_min_tso_segs - INTEGER | |||
| 	if available window is too small. | ||||
| 	Default: 2 | ||||
| 
 | ||||
| tcp_pacing_ss_ratio - INTEGER | ||||
| 	sk->sk_pacing_rate is set by TCP stack using a ratio applied | ||||
| 	to current rate. (current_rate = cwnd * mss / srtt) | ||||
| 	If TCP is in slow start, tcp_pacing_ss_ratio is applied | ||||
| 	to let TCP probe for bigger speeds, assuming cwnd can be | ||||
| 	doubled every other RTT. | ||||
| 	Default: 200 | ||||
| 
 | ||||
| tcp_pacing_ca_ratio - INTEGER | ||||
| 	sk->sk_pacing_rate is set by TCP stack using a ratio applied | ||||
| 	to current rate. (current_rate = cwnd * mss / srtt) | ||||
| 	If TCP is in congestion avoidance phase, tcp_pacing_ca_ratio | ||||
| 	is applied to conservatively probe for bigger throughput. | ||||
| 	Default: 120 | ||||
| 
 | ||||
| tcp_tso_win_divisor - INTEGER | ||||
| 	This allows control over what percentage of the congestion window | ||||
| 	can be consumed by a single TSO frame. | ||||
|  |  | |||
|  | @ -281,6 +281,8 @@ extern unsigned int sysctl_tcp_notsent_lowat; | |||
| extern int sysctl_tcp_min_tso_segs; | ||||
| extern int sysctl_tcp_autocorking; | ||||
| extern int sysctl_tcp_invalid_ratelimit; | ||||
| extern int sysctl_tcp_pacing_ss_ratio; | ||||
| extern int sysctl_tcp_pacing_ca_ratio; | ||||
| 
 | ||||
| extern atomic_long_t tcp_memory_allocated; | ||||
| extern struct percpu_counter tcp_sockets_allocated; | ||||
|  |  | |||
|  | @ -29,6 +29,7 @@ | |||
| static int zero; | ||||
| static int one = 1; | ||||
| static int four = 4; | ||||
| static int thousand = 1000; | ||||
| static int gso_max_segs = GSO_MAX_SEGS; | ||||
| static int tcp_retr1_max = 255; | ||||
| static int ip_local_port_range_min[] = { 1, 1 }; | ||||
|  | @ -711,6 +712,24 @@ static struct ctl_table ipv4_table[] = { | |||
| 		.extra1		= &one, | ||||
| 		.extra2		= &gso_max_segs, | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "tcp_pacing_ss_ratio", | ||||
| 		.data		= &sysctl_tcp_pacing_ss_ratio, | ||||
| 		.maxlen		= sizeof(int), | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec_minmax, | ||||
| 		.extra1		= &zero, | ||||
| 		.extra2		= &thousand, | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "tcp_pacing_ca_ratio", | ||||
| 		.data		= &sysctl_tcp_pacing_ca_ratio, | ||||
| 		.maxlen		= sizeof(int), | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec_minmax, | ||||
| 		.extra1		= &zero, | ||||
| 		.extra2		= &thousand, | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "tcp_autocorking", | ||||
| 		.data		= &sysctl_tcp_autocorking, | ||||
|  |  | |||
|  | @ -753,13 +753,29 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) | |||
|  * TCP pacing, to smooth the burst on large writes when packets | ||||
|  * in flight is significantly lower than cwnd (or rwin) | ||||
|  */ | ||||
| int sysctl_tcp_pacing_ss_ratio __read_mostly = 200; | ||||
| int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; | ||||
| 
 | ||||
| static void tcp_update_pacing_rate(struct sock *sk) | ||||
| { | ||||
| 	const struct tcp_sock *tp = tcp_sk(sk); | ||||
| 	u64 rate; | ||||
| 
 | ||||
| 	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ | ||||
| 	rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); | ||||
| 	rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3); | ||||
| 
 | ||||
| 	/* current rate is (cwnd * mss) / srtt
 | ||||
| 	 * In Slow Start [1], set sk_pacing_rate to 200 % the current rate. | ||||
| 	 * In Congestion Avoidance phase, set it to 120 % the current rate. | ||||
| 	 * | ||||
| 	 * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh) | ||||
| 	 *	 If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching | ||||
| 	 *	 end of slow start and should slow down. | ||||
| 	 */ | ||||
| 	if (tp->snd_cwnd < tp->snd_ssthresh / 2) | ||||
| 		rate *= sysctl_tcp_pacing_ss_ratio; | ||||
| 	else | ||||
| 		rate *= sysctl_tcp_pacing_ca_ratio; | ||||
| 
 | ||||
| 	rate *= max(tp->snd_cwnd, tp->packets_out); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet