forked from mirrors/linux
		
	tcp: prefer packet timing to TS-ECR for RTT
Prefer packet timings to TS-ecr for RTT measurements when both sources are available. That's because broken middle-boxes and remote peer can return packets with corrupted TS ECR fields. Similarly most congestion controls that require RTT signals favor timing-based sources as well. Also check for bad TS ECR values to avoid RTT blow-ups. It has happened on production Web servers. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									375fe02c91
								
							
						
					
					
						commit
						5b08e47caf
					
				
					 2 changed files with 18 additions and 50 deletions
				
			
		| 
						 | 
					@ -591,7 +591,6 @@ extern void tcp_initialize_rcv_mss(struct sock *sk);
 | 
				
			||||||
extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
 | 
					extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
 | 
				
			||||||
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
 | 
					extern int tcp_mss_to_mtu(struct sock *sk, int mss);
 | 
				
			||||||
extern void tcp_mtup_init(struct sock *sk);
 | 
					extern void tcp_mtup_init(struct sock *sk);
 | 
				
			||||||
extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
 | 
					 | 
				
			||||||
extern void tcp_init_buffer_space(struct sock *sk);
 | 
					extern void tcp_init_buffer_space(struct sock *sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void tcp_bound_rto(const struct sock *sk)
 | 
					static inline void tcp_bound_rto(const struct sock *sk)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2792,65 +2792,36 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 | 
				
			||||||
	tcp_xmit_retransmit_queue(sk);
 | 
						tcp_xmit_retransmit_queue(sk);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
 | 
					static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 | 
				
			||||||
 | 
									      s32 seq_rtt)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	tcp_rtt_estimator(sk, seq_rtt);
 | 
						const struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
	tcp_set_rto(sk);
 | 
					
 | 
				
			||||||
	inet_csk(sk)->icsk_backoff = 0;
 | 
						/* Prefer RTT measured from ACK's timing to TS-ECR. This is because
 | 
				
			||||||
}
 | 
						 * broken middle-boxes or peers may corrupt TS-ECR fields. But
 | 
				
			||||||
EXPORT_SYMBOL(tcp_valid_rtt_meas);
 | 
						 * Karn's algorithm forbids taking RTT if some retransmitted data
 | 
				
			||||||
 | 
						 * is acked (RFC6298).
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (flag & FLAG_RETRANS_DATA_ACKED)
 | 
				
			||||||
 | 
							seq_rtt = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Read draft-ietf-tcplw-high-performance before mucking
 | 
					 | 
				
			||||||
 * with this code. (Supersedes RFC1323)
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/* RTTM Rule: A TSecr value received in a segment is used to
 | 
						/* RTTM Rule: A TSecr value received in a segment is used to
 | 
				
			||||||
	 * update the averaged RTT measurement only if the segment
 | 
						 * update the averaged RTT measurement only if the segment
 | 
				
			||||||
	 * acknowledges some new data, i.e., only if it advances the
 | 
						 * acknowledges some new data, i.e., only if it advances the
 | 
				
			||||||
	 * left edge of the send window.
 | 
						 * left edge of the send window.
 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
 | 
						 * See draft-ietf-tcplw-high-performance-00, section 3.3.
 | 
				
			||||||
	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * Changed: reset backoff as soon as we see the first valid sample.
 | 
					 | 
				
			||||||
	 * If we do not, we get strongly overestimated rto. With timestamps
 | 
					 | 
				
			||||||
	 * samples are accepted even from very old segments: f.e., when rtt=1
 | 
					 | 
				
			||||||
	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
 | 
					 | 
				
			||||||
	 * answer arrives rto becomes 120 seconds! If at least one of segments
 | 
					 | 
				
			||||||
	 * in window is lost... Voila.	 			--ANK (010210)
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
						if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 | 
				
			||||||
 | 
							seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
 | 
						if (seq_rtt < 0)
 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/* We don't have a timestamp. Can only use
 | 
					 | 
				
			||||||
	 * packets that are not retransmitted to determine
 | 
					 | 
				
			||||||
	 * rtt estimates. Also, we must not reset the
 | 
					 | 
				
			||||||
	 * backoff for rto until we get a non-retransmitted
 | 
					 | 
				
			||||||
	 * packet. This allows us to deal with a situation
 | 
					 | 
				
			||||||
	 * where the network delay has increased suddenly.
 | 
					 | 
				
			||||||
	 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (flag & FLAG_RETRANS_DATA_ACKED)
 | 
					 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tcp_valid_rtt_meas(sk, seq_rtt);
 | 
						tcp_rtt_estimator(sk, seq_rtt);
 | 
				
			||||||
}
 | 
						tcp_set_rto(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
 | 
						/* RFC6298: only reset backoff on valid RTT measurement. */
 | 
				
			||||||
				      const s32 seq_rtt)
 | 
						inet_csk(sk)->icsk_backoff = 0;
 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	const struct tcp_sock *tp = tcp_sk(sk);
 | 
					 | 
				
			||||||
	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
 | 
					 | 
				
			||||||
	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 | 
					 | 
				
			||||||
		tcp_ack_saw_tstamp(sk, flag);
 | 
					 | 
				
			||||||
	else if (seq_rtt >= 0)
 | 
					 | 
				
			||||||
		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
 | 
					/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
 | 
				
			||||||
| 
						 | 
					@ -2989,8 +2960,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 | 
				
			||||||
			if (sacked & TCPCB_SACKED_RETRANS)
 | 
								if (sacked & TCPCB_SACKED_RETRANS)
 | 
				
			||||||
				tp->retrans_out -= acked_pcount;
 | 
									tp->retrans_out -= acked_pcount;
 | 
				
			||||||
			flag |= FLAG_RETRANS_DATA_ACKED;
 | 
								flag |= FLAG_RETRANS_DATA_ACKED;
 | 
				
			||||||
			ca_seq_rtt = -1;
 | 
					 | 
				
			||||||
			seq_rtt = -1;
 | 
					 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			ca_seq_rtt = now - scb->when;
 | 
								ca_seq_rtt = now - scb->when;
 | 
				
			||||||
			last_ackt = skb->tstamp;
 | 
								last_ackt = skb->tstamp;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue