forked from mirrors/linux
		
	tcp: refine pacing rate determination
When TCP pacing was added back in linux-3.12, we chose to apply a fixed ratio of 200 % against current rate, to allow probing for optimal throughput even during slow start phase, where cwnd can be doubled every other gRTT. At Google, we found it was better applying a different ratio while in Congestion Avoidance phase. This ratio was set to 120 %. We've used the normal tcp_in_slow_start() helper for a while, then tuned the condition to select the conservative ratio as soon as cwnd >= ssthresh/2 : - After cwnd reduction, it is safer to ramp up more slowly, as we approach optimal cwnd. - Initial ramp up (ssthresh == INFINITY) still allows doubling cwnd every other RTT. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									4ec3b28c27
								
							
						
					
					
						commit
						43e122b014
					
				
					 4 changed files with 53 additions and 1 deletions
				
			
		|  | @ -586,6 +586,21 @@ tcp_min_tso_segs - INTEGER | ||||||
| 	if available window is too small. | 	if available window is too small. | ||||||
| 	Default: 2 | 	Default: 2 | ||||||
| 
 | 
 | ||||||
|  | tcp_pacing_ss_ratio - INTEGER | ||||||
|  | 	sk->sk_pacing_rate is set by TCP stack using a ratio applied | ||||||
|  | 	to current rate. (current_rate = cwnd * mss / srtt) | ||||||
|  | 	If TCP is in slow start, tcp_pacing_ss_ratio is applied | ||||||
|  | 	to let TCP probe for bigger speeds, assuming cwnd can be | ||||||
|  | 	doubled every other RTT. | ||||||
|  | 	Default: 200 | ||||||
|  | 
 | ||||||
|  | tcp_pacing_ca_ratio - INTEGER | ||||||
|  | 	sk->sk_pacing_rate is set by TCP stack using a ratio applied | ||||||
|  | 	to current rate. (current_rate = cwnd * mss / srtt) | ||||||
|  | 	If TCP is in congestion avoidance phase, tcp_pacing_ca_ratio | ||||||
|  | 	is applied to conservatively probe for bigger throughput. | ||||||
|  | 	Default: 120 | ||||||
|  | 
 | ||||||
| tcp_tso_win_divisor - INTEGER | tcp_tso_win_divisor - INTEGER | ||||||
| 	This allows control over what percentage of the congestion window | 	This allows control over what percentage of the congestion window | ||||||
| 	can be consumed by a single TSO frame. | 	can be consumed by a single TSO frame. | ||||||
|  |  | ||||||
|  | @ -281,6 +281,8 @@ extern unsigned int sysctl_tcp_notsent_lowat; | ||||||
| extern int sysctl_tcp_min_tso_segs; | extern int sysctl_tcp_min_tso_segs; | ||||||
| extern int sysctl_tcp_autocorking; | extern int sysctl_tcp_autocorking; | ||||||
| extern int sysctl_tcp_invalid_ratelimit; | extern int sysctl_tcp_invalid_ratelimit; | ||||||
|  | extern int sysctl_tcp_pacing_ss_ratio; | ||||||
|  | extern int sysctl_tcp_pacing_ca_ratio; | ||||||
| 
 | 
 | ||||||
| extern atomic_long_t tcp_memory_allocated; | extern atomic_long_t tcp_memory_allocated; | ||||||
| extern struct percpu_counter tcp_sockets_allocated; | extern struct percpu_counter tcp_sockets_allocated; | ||||||
|  |  | ||||||
|  | @ -29,6 +29,7 @@ | ||||||
| static int zero; | static int zero; | ||||||
| static int one = 1; | static int one = 1; | ||||||
| static int four = 4; | static int four = 4; | ||||||
|  | static int thousand = 1000; | ||||||
| static int gso_max_segs = GSO_MAX_SEGS; | static int gso_max_segs = GSO_MAX_SEGS; | ||||||
| static int tcp_retr1_max = 255; | static int tcp_retr1_max = 255; | ||||||
| static int ip_local_port_range_min[] = { 1, 1 }; | static int ip_local_port_range_min[] = { 1, 1 }; | ||||||
|  | @ -711,6 +712,24 @@ static struct ctl_table ipv4_table[] = { | ||||||
| 		.extra1		= &one, | 		.extra1		= &one, | ||||||
| 		.extra2		= &gso_max_segs, | 		.extra2		= &gso_max_segs, | ||||||
| 	}, | 	}, | ||||||
|  | 	{ | ||||||
|  | 		.procname	= "tcp_pacing_ss_ratio", | ||||||
|  | 		.data		= &sysctl_tcp_pacing_ss_ratio, | ||||||
|  | 		.maxlen		= sizeof(int), | ||||||
|  | 		.mode		= 0644, | ||||||
|  | 		.proc_handler	= proc_dointvec_minmax, | ||||||
|  | 		.extra1		= &zero, | ||||||
|  | 		.extra2		= &thousand, | ||||||
|  | 	}, | ||||||
|  | 	{ | ||||||
|  | 		.procname	= "tcp_pacing_ca_ratio", | ||||||
|  | 		.data		= &sysctl_tcp_pacing_ca_ratio, | ||||||
|  | 		.maxlen		= sizeof(int), | ||||||
|  | 		.mode		= 0644, | ||||||
|  | 		.proc_handler	= proc_dointvec_minmax, | ||||||
|  | 		.extra1		= &zero, | ||||||
|  | 		.extra2		= &thousand, | ||||||
|  | 	}, | ||||||
| 	{ | 	{ | ||||||
| 		.procname	= "tcp_autocorking", | 		.procname	= "tcp_autocorking", | ||||||
| 		.data		= &sysctl_tcp_autocorking, | 		.data		= &sysctl_tcp_autocorking, | ||||||
|  |  | ||||||
|  | @ -753,13 +753,29 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) | ||||||
|  * TCP pacing, to smooth the burst on large writes when packets |  * TCP pacing, to smooth the burst on large writes when packets | ||||||
|  * in flight is significantly lower than cwnd (or rwin) |  * in flight is significantly lower than cwnd (or rwin) | ||||||
|  */ |  */ | ||||||
|  | int sysctl_tcp_pacing_ss_ratio __read_mostly = 200; | ||||||
|  | int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; | ||||||
|  | 
 | ||||||
| static void tcp_update_pacing_rate(struct sock *sk) | static void tcp_update_pacing_rate(struct sock *sk) | ||||||
| { | { | ||||||
| 	const struct tcp_sock *tp = tcp_sk(sk); | 	const struct tcp_sock *tp = tcp_sk(sk); | ||||||
| 	u64 rate; | 	u64 rate; | ||||||
| 
 | 
 | ||||||
| 	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ | 	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ | ||||||
| 	rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); | 	rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3); | ||||||
|  | 
 | ||||||
|  | 	/* current rate is (cwnd * mss) / srtt
 | ||||||
|  | 	 * In Slow Start [1], set sk_pacing_rate to 200 % the current rate. | ||||||
|  | 	 * In Congestion Avoidance phase, set it to 120 % the current rate. | ||||||
|  | 	 * | ||||||
|  | 	 * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh) | ||||||
|  | 	 *	 If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching | ||||||
|  | 	 *	 end of slow start and should slow down. | ||||||
|  | 	 */ | ||||||
|  | 	if (tp->snd_cwnd < tp->snd_ssthresh / 2) | ||||||
|  | 		rate *= sysctl_tcp_pacing_ss_ratio; | ||||||
|  | 	else | ||||||
|  | 		rate *= sysctl_tcp_pacing_ca_ratio; | ||||||
| 
 | 
 | ||||||
| 	rate *= max(tp->snd_cwnd, tp->packets_out); | 	rate *= max(tp->snd_cwnd, tp->packets_out); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet