forked from mirrors/linux
		
	tcp: allow for bigger reordering level
While testing upcoming Yaogong patch (converting out of order queue into an RB tree), I hit the max reordering level of linux TCP stack. Reordering level was limited to 127 for no good reason, and some network setups [1] can easily reach this limit and get limited throughput. Allow a new max limit of 300, and add a sysctl to allow admins to even allow bigger (or lower) values if needed. [1] Aggregation of links, per packet load balancing, fabrics not doing deep packet inspections, alternative TCP congestion modules... Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Yaogong Wang <wygivan@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									7aef06db0f
								
							
						
					
					
						commit
						dca145ffaa
					
				
					 6 changed files with 23 additions and 12 deletions
				
			
		|  | @ -2230,11 +2230,8 @@ balance-rr: This mode is the only mode that will permit a single | |||
| 
 | ||||
| 	It is possible to adjust TCP/IP's congestion limits by | ||||
| 	altering the net.ipv4.tcp_reordering sysctl parameter.  The | ||||
| 	usual default value is 3, and the maximum useful value is 127. | ||||
| 	For a four interface balance-rr bond, expect that a single | ||||
| 	TCP/IP stream will utilize no more than approximately 2.3 | ||||
| 	interface's worth of throughput, even after adjusting | ||||
| 	tcp_reordering. | ||||
| 	usual default value is 3. But keep in mind TCP stack is able | ||||
| 	to automatically increase this when it detects reorders. | ||||
| 
 | ||||
| 	Note that the fraction of packets that will be delivered out of | ||||
| 	order is highly variable, and is unlikely to be zero.  The level | ||||
|  |  | |||
|  | @ -376,9 +376,17 @@ tcp_orphan_retries - INTEGER | |||
| 	may consume significant resources. Cf. tcp_max_orphans. | ||||
| 
 | ||||
| tcp_reordering - INTEGER | ||||
| 	Maximal reordering of packets in a TCP stream. | ||||
| 	Initial reordering level of packets in a TCP stream. | ||||
| 	TCP stack can then dynamically adjust flow reordering level | ||||
| 	between this initial value and tcp_max_reordering | ||||
| 	Default: 3 | ||||
| 
 | ||||
| tcp_max_reordering - INTEGER | ||||
| 	Maximal reordering level of packets in a TCP stream. | ||||
| 	300 is a fairly conservative value, but you might increase it | ||||
| 	if paths are using per packet load balancing (like bonding rr mode) | ||||
| 	Default: 300 | ||||
| 
 | ||||
| tcp_retrans_collapse - BOOLEAN | ||||
| 	Bug-to-bug compatibility with some broken printers. | ||||
| 	On retransmit try to send bigger packets to work around bugs in | ||||
|  |  | |||
|  | @ -204,10 +204,10 @@ struct tcp_sock { | |||
| 
 | ||||
| 	u16	urg_data;	/* Saved octet of OOB data and control flags */ | ||||
| 	u8	ecn_flags;	/* ECN status bits.			*/ | ||||
| 	u8	reordering;	/* Packet reordering metric.		*/ | ||||
| 	u8	keepalive_probes; /* num of allowed keep alive probes	*/ | ||||
| 	u32	reordering;	/* Packet reordering metric.		*/ | ||||
| 	u32	snd_up;		/* Urgent pointer		*/ | ||||
| 
 | ||||
| 	u8	keepalive_probes; /* num of allowed keep alive probes	*/ | ||||
| /*
 | ||||
|  *      Options received (usually on last packet, some only on SYN packets). | ||||
|  */ | ||||
|  |  | |||
|  | @ -70,9 +70,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
| /* After receiving this amount of duplicate ACKs fast retransmit starts. */ | ||||
| #define TCP_FASTRETRANS_THRESH 3 | ||||
| 
 | ||||
| /* Maximal reordering. */ | ||||
| #define TCP_MAX_REORDERING	127 | ||||
| 
 | ||||
| /* Maximal number of ACKs sent quickly to accelerate slow-start. */ | ||||
| #define TCP_MAX_QUICKACKS	16U | ||||
| 
 | ||||
|  | @ -252,6 +249,7 @@ extern int sysctl_tcp_abort_on_overflow; | |||
| extern int sysctl_tcp_max_orphans; | ||||
| extern int sysctl_tcp_fack; | ||||
| extern int sysctl_tcp_reordering; | ||||
| extern int sysctl_tcp_max_reordering; | ||||
| extern int sysctl_tcp_dsack; | ||||
| extern long sysctl_tcp_mem[3]; | ||||
| extern int sysctl_tcp_wmem[3]; | ||||
|  |  | |||
|  | @ -495,6 +495,13 @@ static struct ctl_table ipv4_table[] = { | |||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "tcp_max_reordering", | ||||
| 		.data		= &sysctl_tcp_max_reordering, | ||||
| 		.maxlen		= sizeof(int), | ||||
| 		.mode		= 0644, | ||||
| 		.proc_handler	= proc_dointvec | ||||
| 	}, | ||||
| 	{ | ||||
| 		.procname	= "tcp_dsack", | ||||
| 		.data		= &sysctl_tcp_dsack, | ||||
|  |  | |||
|  | @ -81,6 +81,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1; | |||
| int sysctl_tcp_sack __read_mostly = 1; | ||||
| int sysctl_tcp_fack __read_mostly = 1; | ||||
| int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; | ||||
| int sysctl_tcp_max_reordering __read_mostly = 300; | ||||
| EXPORT_SYMBOL(sysctl_tcp_reordering); | ||||
| int sysctl_tcp_dsack __read_mostly = 1; | ||||
| int sysctl_tcp_app_win __read_mostly = 31; | ||||
|  | @ -833,7 +834,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
| 	if (metric > tp->reordering) { | ||||
| 		int mib_idx; | ||||
| 
 | ||||
| 		tp->reordering = min(TCP_MAX_REORDERING, metric); | ||||
| 		tp->reordering = min(sysctl_tcp_max_reordering, metric); | ||||
| 
 | ||||
| 		/* This exciting event is worth to be remembered. 8) */ | ||||
| 		if (ts) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet