mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	tcp: remove early retransmit
This patch removes the support of RFC5827 early retransmit (i.e., fast recovery on small inflight with <3 dupacks) because it is subsumed by the new RACK loss detection. More specifically when RACK receives DUPACKs, it'll arm a reordering timer to start fast recovery after a quarter of (min)RTT, hence it covers the early retransmit except RACK does not limit itself to specific inflight or dupack numbers. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									840a3cbe89
								
							
						
					
					
						commit
						bec41a11dd
					
				
					 12 changed files with 12 additions and 111 deletions
				
			
		| 
						 | 
				
			
			@ -246,21 +246,12 @@ tcp_dsack - BOOLEAN
 | 
			
		|||
	Allows TCP to send "duplicate" SACKs.
 | 
			
		||||
 | 
			
		||||
tcp_early_retrans - INTEGER
 | 
			
		||||
	Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold
 | 
			
		||||
	for triggering fast retransmit when the amount of outstanding data is
 | 
			
		||||
	small and when no previously unsent data can be transmitted (such
 | 
			
		||||
	that limited transmit could be used). Also controls the use of
 | 
			
		||||
	Tail loss probe (TLP) that converts RTOs occurring due to tail
 | 
			
		||||
	losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01).
 | 
			
		||||
	Tail loss probe (TLP) converts RTOs occurring due to tail
 | 
			
		||||
	losses into fast recovery (draft-ietf-tcpm-rack). Note that
 | 
			
		||||
	TLP requires RACK to function properly (see tcp_recovery below)
 | 
			
		||||
	Possible values:
 | 
			
		||||
		0 disables ER
 | 
			
		||||
		1 enables ER
 | 
			
		||||
		2 enables ER but delays fast recovery and fast retransmit
 | 
			
		||||
		  by a fourth of RTT. This mitigates connection falsely
 | 
			
		||||
		  recovers when network has a small degree of reordering
 | 
			
		||||
		  (less than 3 packets).
 | 
			
		||||
		3 enables delayed ER and TLP.
 | 
			
		||||
		4 enables TLP only.
 | 
			
		||||
		0 disables TLP
 | 
			
		||||
		3 or 4 enables TLP
 | 
			
		||||
	Default: 3
 | 
			
		||||
 | 
			
		||||
tcp_ecn - INTEGER
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -224,8 +224,7 @@ struct tcp_sock {
 | 
			
		|||
		repair      : 1,
 | 
			
		||||
		frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */
 | 
			
		||||
	u8	repair_queue;
 | 
			
		||||
	u8	do_early_retrans:1,/* Enable RFC5827 early-retransmit  */
 | 
			
		||||
		syn_data:1,	/* SYN includes data */
 | 
			
		||||
	u8	syn_data:1,	/* SYN includes data */
 | 
			
		||||
		syn_fastopen:1,	/* SYN includes Fast Open option */
 | 
			
		||||
		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 | 
			
		||||
		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -565,7 +565,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 | 
			
		|||
			     const struct sk_buff *next_skb);
 | 
			
		||||
 | 
			
		||||
/* tcp_input.c */
 | 
			
		||||
void tcp_resume_early_retransmit(struct sock *sk);
 | 
			
		||||
void tcp_rearm_rto(struct sock *sk);
 | 
			
		||||
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
 | 
			
		||||
void tcp_reset(struct sock *sk);
 | 
			
		||||
| 
						 | 
				
			
			@ -1037,24 +1036,6 @@ static inline void tcp_enable_fack(struct tcp_sock *tp)
 | 
			
		|||
	tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* TCP early-retransmit (ER) is similar to but more conservative than
 | 
			
		||||
 * the thin-dupack feature.  Enable ER only if thin-dupack is disabled.
 | 
			
		||||
 */
 | 
			
		||||
static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
 | 
			
		||||
{
 | 
			
		||||
	struct net *net = sock_net((struct sock *)tp);
 | 
			
		||||
 | 
			
		||||
	tp->do_early_retrans = sysctl_tcp_early_retrans &&
 | 
			
		||||
		sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
 | 
			
		||||
		!(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) &&
 | 
			
		||||
		net->ipv4.sysctl_tcp_reordering == 3;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
 | 
			
		||||
{
 | 
			
		||||
	tp->do_early_retrans = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
 | 
			
		||||
{
 | 
			
		||||
	return tp->sacked_out + tp->lost_out;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -215,7 +215,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 | 
			
		||||
		r->idiag_timer = 1;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk)
 | 
			
		|||
	tp->mss_cache = TCP_MSS_DEFAULT;
 | 
			
		||||
 | 
			
		||||
	tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
 | 
			
		||||
	tcp_enable_early_retrans(tp);
 | 
			
		||||
	tcp_assign_congestion_control(sk);
 | 
			
		||||
 | 
			
		||||
	tp->tsoffset = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -2477,8 +2476,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 | 
			
		|||
			err = -EINVAL;
 | 
			
		||||
		else {
 | 
			
		||||
			tp->thin_dupack = val;
 | 
			
		||||
			if (tp->thin_dupack)
 | 
			
		||||
				tcp_disable_early_retrans(tp);
 | 
			
		||||
		}
 | 
			
		||||
		break;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -904,8 +904,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 | 
			
		|||
		tcp_disable_fack(tp);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (metric > 0)
 | 
			
		||||
		tcp_disable_early_retrans(tp);
 | 
			
		||||
	tp->rack.reord = 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2054,30 +2052,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
 | 
			
		|||
	return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
 | 
			
		||||
{
 | 
			
		||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
			
		||||
	unsigned long delay;
 | 
			
		||||
 | 
			
		||||
	/* Delay early retransmit and entering fast recovery for
 | 
			
		||||
	 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
 | 
			
		||||
	 * available, or RTO is scheduled to fire first.
 | 
			
		||||
	 */
 | 
			
		||||
	if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
 | 
			
		||||
	    (flag & FLAG_ECE) || !tp->srtt_us)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
 | 
			
		||||
		    msecs_to_jiffies(2));
 | 
			
		||||
 | 
			
		||||
	if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
 | 
			
		||||
				  TCP_RTO_MAX);
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Linux NewReno/SACK/FACK/ECN state machine.
 | 
			
		||||
 * --------------------------------------
 | 
			
		||||
 *
 | 
			
		||||
| 
						 | 
				
			
			@ -2221,16 +2195,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
 | 
			
		|||
	    tcp_is_sack(tp) && !tcp_send_head(sk))
 | 
			
		||||
		return true;
 | 
			
		||||
 | 
			
		||||
	/* Trick#6: TCP early retransmit, per RFC5827.  To avoid spurious
 | 
			
		||||
	 * retransmissions due to small network reorderings, we implement
 | 
			
		||||
	 * Mitigation A.3 in the RFC and delay the retransmission for a short
 | 
			
		||||
	 * interval if appropriate.
 | 
			
		||||
	 */
 | 
			
		||||
	if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
 | 
			
		||||
	    (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
 | 
			
		||||
	    !tcp_may_send_now(sk))
 | 
			
		||||
		return !tcp_pause_early_retransmit(sk, flag);
 | 
			
		||||
 | 
			
		||||
	return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3050,8 +3014,7 @@ void tcp_rearm_rto(struct sock *sk)
 | 
			
		|||
	} else {
 | 
			
		||||
		u32 rto = inet_csk(sk)->icsk_rto;
 | 
			
		||||
		/* Offset the time elapsed after installing regular RTO */
 | 
			
		||||
		if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 | 
			
		||||
		    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 | 
			
		||||
		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 | 
			
		||||
		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 | 
			
		||||
			struct sk_buff *skb = tcp_write_queue_head(sk);
 | 
			
		||||
			const u32 rto_time_stamp =
 | 
			
		||||
| 
						 | 
				
			
			@ -3068,24 +3031,6 @@ void tcp_rearm_rto(struct sock *sk)
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* This function is called when the delayed ER timer fires. TCP enters
 | 
			
		||||
 * fast recovery and performs fast-retransmit.
 | 
			
		||||
 */
 | 
			
		||||
void tcp_resume_early_retransmit(struct sock *sk)
 | 
			
		||||
{
 | 
			
		||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
			
		||||
 | 
			
		||||
	tcp_rearm_rto(sk);
 | 
			
		||||
 | 
			
		||||
	/* Stop if ER is disabled after the delayed ER timer is scheduled */
 | 
			
		||||
	if (!tp->do_early_retrans)
 | 
			
		||||
		return;
 | 
			
		||||
 | 
			
		||||
	tcp_enter_recovery(sk, false);
 | 
			
		||||
	tcp_update_scoreboard(sk, 1);
 | 
			
		||||
	tcp_xmit_retransmit_queue(sk);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* If we get here, the whole TSO packet has not been acked. */
 | 
			
		||||
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -3651,8 +3596,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 | 
			
		|||
 | 
			
		||||
	skb_mstamp_get(&sack_state.ack_time);
 | 
			
		||||
 | 
			
		||||
	if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 | 
			
		||||
	if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 | 
			
		||||
		tcp_rearm_rto(sk);
 | 
			
		||||
 | 
			
		||||
	if (after(ack, prior_snd_una)) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2229,7 +2229,6 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 | 
			
		|||
	int state;
 | 
			
		||||
 | 
			
		||||
	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 | 
			
		||||
		timer_active	= 1;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -522,7 +522,6 @@ void tcp_init_metrics(struct sock *sk)
 | 
			
		|||
	val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
 | 
			
		||||
	if (val && tp->reordering != val) {
 | 
			
		||||
		tcp_disable_fack(tp);
 | 
			
		||||
		tcp_disable_early_retrans(tp);
 | 
			
		||||
		tp->reordering = val;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -468,7 +468,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 | 
			
		|||
		newtp->sacked_out = 0;
 | 
			
		||||
		newtp->fackets_out = 0;
 | 
			
		||||
		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 | 
			
		||||
		tcp_enable_early_retrans(newtp);
 | 
			
		||||
		newtp->tlp_high_seq = 0;
 | 
			
		||||
		newtp->lsndtime = treq->snt_synack.stamp_jiffies;
 | 
			
		||||
		newsk->sk_txhash = treq->txhash;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -76,10 +76,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 | 
			
		|||
	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 | 
			
		||||
 | 
			
		||||
	tp->packets_out += tcp_skb_pcount(skb);
 | 
			
		||||
	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 | 
			
		||||
	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 | 
			
		||||
		tcp_rearm_rto(sk);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
 | 
			
		||||
		      tcp_skb_pcount(skb));
 | 
			
		||||
| 
						 | 
				
			
			@ -2289,8 +2287,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 | 
			
		|||
	u32 timeout, tlp_time_stamp, rto_time_stamp;
 | 
			
		||||
	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 | 
			
		||||
 | 
			
		||||
	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
 | 
			
		||||
		return false;
 | 
			
		||||
	/* No consecutive loss probes. */
 | 
			
		||||
	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
 | 
			
		||||
		tcp_rearm_rto(sk);
 | 
			
		||||
| 
						 | 
				
			
			@ -2309,8 +2305,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 | 
			
		|||
	/* Schedule a loss probe in 2*RTT for SACK capable connections
 | 
			
		||||
	 * in Open state, that are either limited by cwnd or application.
 | 
			
		||||
	 */
 | 
			
		||||
	if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
 | 
			
		||||
	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
 | 
			
		||||
	if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
 | 
			
		||||
	    !tp->packets_out || !tcp_is_sack(tp) ||
 | 
			
		||||
	    icsk->icsk_ca_state != TCP_CA_Open)
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -566,9 +566,6 @@ void tcp_write_timer_handler(struct sock *sk)
 | 
			
		|||
	case ICSK_TIME_REO_TIMEOUT:
 | 
			
		||||
		tcp_rack_reo_timeout(sk);
 | 
			
		||||
		break;
 | 
			
		||||
	case ICSK_TIME_EARLY_RETRANS:
 | 
			
		||||
		tcp_resume_early_retransmit(sk);
 | 
			
		||||
		break;
 | 
			
		||||
	case ICSK_TIME_LOSS_PROBE:
 | 
			
		||||
		tcp_send_loss_probe(sk);
 | 
			
		||||
		break;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1745,7 +1745,6 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 | 
			
		|||
	srcp  = ntohs(inet->inet_sport);
 | 
			
		||||
 | 
			
		||||
	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 | 
			
		||||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 | 
			
		||||
		timer_active	= 1;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue