forked from mirrors/linux
		
	tcp: derive delack_max from rto_min
While BPF allows to set icsk->->icsk_delack_max
and/or icsk->icsk_rto_min, we have an ip route
attribute (RTAX_RTO_MIN) to be able to tune rto_min,
but nothing to consequently adjust max delayed ack,
which vary from 40ms to 200 ms (TCP_DELACK_{MIN|MAX}).
This makes RTAX_RTO_MIN of almost no practical use,
unless customers are in big trouble.
Modern days datacenter communications want to set
rto_min to ~5 ms, and the max delayed ack one jiffie
smaller to avoid spurious retransmits.
After this patch, an "rto_min 5" route attribute will
effectively lower max delayed ack timers to 4 ms.
Note in the following ss output, "rto:6 ... ato:4"
$ ss -temoi dst XXXXXX
State Recv-Q Send-Q           Local Address:Port       Peer Address:Port  Process
ESTAB 0      0        [2002:a05:6608:295::]:52950   [2002:a05:6608:297::]:41597
     ino:255134 sk:1001 <->
         skmem:(r0,rb1707063,t872,tb262144,f0,w0,o0,bl0,d0) ts sack
 cubic wscale:8,8 rto:6 rtt:0.02/0.002 ato:4 mss:4096 pmtu:4500
 rcvmss:536 advmss:4096 cwnd:10 bytes_sent:54823160 bytes_acked:54823121
 bytes_received:54823120 segs_out:1370582 segs_in:1370580
 data_segs_out:1370579 data_segs_in:1370578 send 16.4Gbps
 pacing_rate 32.6Gbps delivery_rate 1.72Gbps delivered:1370579
 busy:26920ms unacked:1 rcv_rtt:34.615 rcv_space:65920
 rcv_ssthresh:65535 minrtt:0.015 snd_wnd:65536
While we could argue this patch fixes a bug with RTAX_RTO_MIN,
I do not add a Fixes: tag, so that we can soak it a bit before
asking backports to stable branches.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									f68a181fcd
								
							
						
					
					
						commit
						bbf80d713f
					
				
					 3 changed files with 19 additions and 2 deletions
				
			
		|  | @ -718,6 +718,8 @@ static inline void tcp_fast_path_check(struct sock *sk) | ||||||
| 		tcp_fast_path_on(tp); | 		tcp_fast_path_on(tp); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | u32 tcp_delack_max(const struct sock *sk); | ||||||
|  | 
 | ||||||
| /* Compute the actual rto_min value */ | /* Compute the actual rto_min value */ | ||||||
| static inline u32 tcp_rto_min(const struct sock *sk) | static inline u32 tcp_rto_min(const struct sock *sk) | ||||||
| { | { | ||||||
|  |  | ||||||
|  | @ -3762,7 +3762,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | ||||||
| 		info->tcpi_options |= TCPI_OPT_SYN_DATA; | 		info->tcpi_options |= TCPI_OPT_SYN_DATA; | ||||||
| 
 | 
 | ||||||
| 	info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); | 	info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); | ||||||
| 	info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); | 	info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato, | ||||||
|  | 					      tcp_delack_max(sk))); | ||||||
| 	info->tcpi_snd_mss = tp->mss_cache; | 	info->tcpi_snd_mss = tp->mss_cache; | ||||||
| 	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; | 	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3977,6 +3977,20 @@ int tcp_connect(struct sock *sk) | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(tcp_connect); | EXPORT_SYMBOL(tcp_connect); | ||||||
| 
 | 
 | ||||||
|  | u32 tcp_delack_max(const struct sock *sk) | ||||||
|  | { | ||||||
|  | 	const struct dst_entry *dst = __sk_dst_get(sk); | ||||||
|  | 	u32 delack_max = inet_csk(sk)->icsk_delack_max; | ||||||
|  | 
 | ||||||
|  | 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) { | ||||||
|  | 		u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); | ||||||
|  | 		u32 delack_from_rto_min = max_t(int, 1, rto_min - 1); | ||||||
|  | 
 | ||||||
|  | 		delack_max = min_t(u32, delack_max, delack_from_rto_min); | ||||||
|  | 	} | ||||||
|  | 	return delack_max; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Send out a delayed ack, the caller does the policy checking
 | /* Send out a delayed ack, the caller does the policy checking
 | ||||||
|  * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check() |  * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check() | ||||||
|  * for details. |  * for details. | ||||||
|  | @ -4012,7 +4026,7 @@ void tcp_send_delayed_ack(struct sock *sk) | ||||||
| 		ato = min(ato, max_ato); | 		ato = min(ato, max_ato); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max); | 	ato = min_t(u32, ato, tcp_delack_max(sk)); | ||||||
| 
 | 
 | ||||||
| 	/* Stay within the limit we were given */ | 	/* Stay within the limit we were given */ | ||||||
| 	timeout = jiffies + ato; | 	timeout = jiffies + ato; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet