mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	tcp: switch rcv_rtt_est and rcvq_space to high resolution timestamps
Some devices or distributions use HZ=100 or HZ=250 TCP receive buffer autotuning has poor behavior caused by this choice. Since autotuning happens after 4 ms or 10 ms, short distance flows get their receive buffer tuned to a very high value, but after an initial period where it was frozen to (too small) initial value. With tp->tcp_mstamp introduction, we can switch to high resolution timestamps almost for free (at the expense of 8 additional bytes per TCP structure) Note that some TCP stacks use usec TCP timestamps where this patch makes even more sense : Many TCP flows have < 500 usec RTT. Hopefully this finer TS option can be standardized soon. Tested: HZ=100 kernel ./netperf -H lpaa24 -t TCP_RR -l 1000 -- -r 10000,10000 & Peer without patch : lpaa24:~# ss -tmi dst lpaa23 ... skmem:(r0,rb8388608,...) rcv_rtt:10 rcv_space:3210000 minrtt:0.017 Peer with the patch : lpaa23:~# ss -tmi dst lpaa24 ... skmem:(r0,rb428800,...) rcv_rtt:0.069 rcv_space:30000 minrtt:0.017 We can see saner RCVBUF, and more precise rcv_rtt information. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									a6db50b81e
								
							
						
					
					
						commit
						645f4c6f2e
					
				
					 3 changed files with 24 additions and 18 deletions
				
			
		| 
						 | 
				
			
			@ -333,16 +333,16 @@ struct tcp_sock {
 | 
			
		|||
 | 
			
		||||
/* Receiver side RTT estimation */
 | 
			
		||||
	struct {
 | 
			
		||||
		u32	rtt;
 | 
			
		||||
		u32		rtt_us;
 | 
			
		||||
		u32		seq;
 | 
			
		||||
		u32	time;
 | 
			
		||||
		struct skb_mstamp time;
 | 
			
		||||
	} rcv_rtt_est;
 | 
			
		||||
 | 
			
		||||
/* Receiver queue space */
 | 
			
		||||
	struct {
 | 
			
		||||
		int		space;
 | 
			
		||||
		u32		seq;
 | 
			
		||||
		u32	time;
 | 
			
		||||
		struct skb_mstamp time;
 | 
			
		||||
	} rcvq_space;
 | 
			
		||||
 | 
			
		||||
/* TCP-specific MTU probe information. */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2853,7 +2853,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 | 
			
		|||
	info->tcpi_snd_ssthresh = tp->snd_ssthresh;
 | 
			
		||||
	info->tcpi_advmss = tp->advmss;
 | 
			
		||||
 | 
			
		||||
	info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
 | 
			
		||||
	info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3;
 | 
			
		||||
	info->tcpi_rcv_space = tp->rcvq_space.space;
 | 
			
		||||
 | 
			
		||||
	info->tcpi_total_retrans = tp->total_retrans;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -442,7 +442,8 @@ void tcp_init_buffer_space(struct sock *sk)
 | 
			
		|||
		tcp_sndbuf_expand(sk);
 | 
			
		||||
 | 
			
		||||
	tp->rcvq_space.space = tp->rcv_wnd;
 | 
			
		||||
	tp->rcvq_space.time = tcp_time_stamp;
 | 
			
		||||
	skb_mstamp_get(&tp->tcp_mstamp);
 | 
			
		||||
	tp->rcvq_space.time = tp->tcp_mstamp;
 | 
			
		||||
	tp->rcvq_space.seq = tp->copied_seq;
 | 
			
		||||
 | 
			
		||||
	maxwin = tcp_full_space(sk);
 | 
			
		||||
| 
						 | 
				
			
			@ -518,7 +519,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
 | 
			
		|||
 */
 | 
			
		||||
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 | 
			
		||||
{
 | 
			
		||||
	u32 new_sample = tp->rcv_rtt_est.rtt;
 | 
			
		||||
	u32 new_sample = tp->rcv_rtt_est.rtt_us;
 | 
			
		||||
	long m = sample;
 | 
			
		||||
 | 
			
		||||
	if (m == 0)
 | 
			
		||||
| 
						 | 
				
			
			@ -548,21 +549,23 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 | 
			
		|||
		new_sample = m << 3;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (tp->rcv_rtt_est.rtt != new_sample)
 | 
			
		||||
		tp->rcv_rtt_est.rtt = new_sample;
 | 
			
		||||
	tp->rcv_rtt_est.rtt_us = new_sample;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
 | 
			
		||||
{
 | 
			
		||||
	if (tp->rcv_rtt_est.time == 0)
 | 
			
		||||
	u32 delta_us;
 | 
			
		||||
 | 
			
		||||
	if (tp->rcv_rtt_est.time.v64 == 0)
 | 
			
		||||
		goto new_measure;
 | 
			
		||||
	if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
 | 
			
		||||
		return;
 | 
			
		||||
	tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
 | 
			
		||||
	delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
 | 
			
		||||
	tcp_rcv_rtt_update(tp, delta_us, 1);
 | 
			
		||||
 | 
			
		||||
new_measure:
 | 
			
		||||
	tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
 | 
			
		||||
	tp->rcv_rtt_est.time = tcp_time_stamp;
 | 
			
		||||
	tp->rcv_rtt_est.time = tp->tcp_mstamp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 | 
			
		||||
| 
						 | 
				
			
			@ -572,7 +575,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 | 
			
		|||
	if (tp->rx_opt.rcv_tsecr &&
 | 
			
		||||
	    (TCP_SKB_CB(skb)->end_seq -
 | 
			
		||||
	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
 | 
			
		||||
		tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
 | 
			
		||||
		tcp_rcv_rtt_update(tp,
 | 
			
		||||
				   jiffies_to_usecs(tcp_time_stamp -
 | 
			
		||||
						    tp->rx_opt.rcv_tsecr),
 | 
			
		||||
				   0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -585,8 +591,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
 | 
			
		|||
	int time;
 | 
			
		||||
	int copied;
 | 
			
		||||
 | 
			
		||||
	time = tcp_time_stamp - tp->rcvq_space.time;
 | 
			
		||||
	if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
 | 
			
		||||
	time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
 | 
			
		||||
	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	/* Number of bytes copied to user in last RTT */
 | 
			
		||||
| 
						 | 
				
			
			@ -642,7 +648,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 | 
			
		|||
 | 
			
		||||
new_measure:
 | 
			
		||||
	tp->rcvq_space.seq = tp->copied_seq;
 | 
			
		||||
	tp->rcvq_space.time = tcp_time_stamp;
 | 
			
		||||
	tp->rcvq_space.time = tp->tcp_mstamp;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* There is something which you must keep in mind when you analyze the
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue