mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	[TCP]: sysctl to allow TCP window > 32767 sans wscale
Back in the dark ages, we had to be conservative and only allow 15-bit window fields if the window scale option was not negotiated. Some ancient stacks used a signed 16-bit quantity for the window field of the TCP header and would get confused. Those days are long gone, so we can use the full 16-bits by default now. There is a sysctl added so that we can still interact with such old stacks Signed-off-by: Rick Jones <rick.jones2@hp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									c1b1bce852
								
							
						
					
					
						commit
						15d99e02ba
					
				
					 5 changed files with 34 additions and 7 deletions
				
			
		| 
						 | 
					@ -355,6 +355,13 @@ somaxconn - INTEGER
 | 
				
			||||||
	Defaults to 128.  See also tcp_max_syn_backlog for additional tuning
 | 
						Defaults to 128.  See also tcp_max_syn_backlog for additional tuning
 | 
				
			||||||
	for TCP sockets.
 | 
						for TCP sockets.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					tcp_workaround_signed_windows - BOOLEAN
 | 
				
			||||||
 | 
						If set, assume no receipt of a window scaling option means the
 | 
				
			||||||
 | 
						remote TCP is broken and treats the window as a signed quantity.
 | 
				
			||||||
 | 
						If unset, assume the remote TCP is not broken even if we do
 | 
				
			||||||
 | 
						not receive a window scaling option from them.
 | 
				
			||||||
 | 
						Default: 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
IP Variables:
 | 
					IP Variables:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ip_local_port_range - 2 INTEGERS
 | 
					ip_local_port_range - 2 INTEGERS
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -402,6 +402,7 @@ enum
 | 
				
			||||||
	NET_IPV4_IPFRAG_MAX_DIST=112,
 | 
						NET_IPV4_IPFRAG_MAX_DIST=112,
 | 
				
			||||||
 	NET_TCP_MTU_PROBING=113,
 | 
					 	NET_TCP_MTU_PROBING=113,
 | 
				
			||||||
	NET_TCP_BASE_MSS=114,
 | 
						NET_TCP_BASE_MSS=114,
 | 
				
			||||||
 | 
						NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -224,6 +224,7 @@ extern int sysctl_tcp_tso_win_divisor;
 | 
				
			||||||
extern int sysctl_tcp_abc;
 | 
					extern int sysctl_tcp_abc;
 | 
				
			||||||
extern int sysctl_tcp_mtu_probing;
 | 
					extern int sysctl_tcp_mtu_probing;
 | 
				
			||||||
extern int sysctl_tcp_base_mss;
 | 
					extern int sysctl_tcp_base_mss;
 | 
				
			||||||
 | 
					extern int sysctl_tcp_workaround_signed_windows;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern atomic_t tcp_memory_allocated;
 | 
					extern atomic_t tcp_memory_allocated;
 | 
				
			||||||
extern atomic_t tcp_sockets_allocated;
 | 
					extern atomic_t tcp_sockets_allocated;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -680,7 +680,14 @@ ctl_table ipv4_table[] = {
 | 
				
			||||||
		.mode		= 0644,
 | 
							.mode		= 0644,
 | 
				
			||||||
		.proc_handler	= &proc_dointvec,
 | 
							.proc_handler	= &proc_dointvec,
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
							.ctl_name	= NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS,
 | 
				
			||||||
 | 
							.procname	= "tcp_workaround_signed_windows",
 | 
				
			||||||
 | 
							.data		= &sysctl_tcp_workaround_signed_windows,
 | 
				
			||||||
 | 
							.maxlen		= sizeof(int),
 | 
				
			||||||
 | 
							.mode		= 0644,
 | 
				
			||||||
 | 
							.proc_handler	= &proc_dointvec
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
	{ .ctl_name = 0 }
 | 
						{ .ctl_name = 0 }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -45,6 +45,11 @@
 | 
				
			||||||
/* People can turn this off for buggy TCP's found in printers etc. */
 | 
					/* People can turn this off for buggy TCP's found in printers etc. */
 | 
				
			||||||
int sysctl_tcp_retrans_collapse = 1;
 | 
					int sysctl_tcp_retrans_collapse = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* People can turn this on to  work with those rare, broken TCPs that
 | 
				
			||||||
 | 
					 * interpret the window field as a signed quantity.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int sysctl_tcp_workaround_signed_windows = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* This limits the percentage of the congestion window which we
 | 
					/* This limits the percentage of the congestion window which we
 | 
				
			||||||
 * will allow a single TSO frame to consume.  Building TSO frames
 | 
					 * will allow a single TSO frame to consume.  Building TSO frames
 | 
				
			||||||
 * which are too large can cause TCP streams to be bursty.
 | 
					 * which are too large can cause TCP streams to be bursty.
 | 
				
			||||||
| 
						 | 
					@ -177,12 +182,18 @@ void tcp_select_initial_window(int __space, __u32 mss,
 | 
				
			||||||
		space = (space / mss) * mss;
 | 
							space = (space / mss) * mss;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* NOTE: offering an initial window larger than 32767
 | 
						/* NOTE: offering an initial window larger than 32767
 | 
				
			||||||
	 * will break some buggy TCP stacks. We try to be nice.
 | 
						 * will break some buggy TCP stacks. If the admin tells us
 | 
				
			||||||
	 * If we are not window scaling, then this truncates
 | 
						 * it is likely we could be speaking with such a buggy stack
 | 
				
			||||||
	 * our initial window offering to 32k. There should also
 | 
						 * we will truncate our initial window offering to 32K-1
 | 
				
			||||||
	 * be a sysctl option to stop being nice.
 | 
						 * unless the remote has sent us a window scaling option,
 | 
				
			||||||
 | 
						 * which we interpret as a sign the remote TCP is not
 | 
				
			||||||
 | 
						 * misinterpreting the window field as a signed quantity.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
 | 
						if (sysctl_tcp_workaround_signed_windows)
 | 
				
			||||||
 | 
							(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							(*rcv_wnd) = space;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	(*rcv_wscale) = 0;
 | 
						(*rcv_wscale) = 0;
 | 
				
			||||||
	if (wscale_ok) {
 | 
						if (wscale_ok) {
 | 
				
			||||||
		/* Set window scaling on max possible window
 | 
							/* Set window scaling on max possible window
 | 
				
			||||||
| 
						 | 
					@ -241,7 +252,7 @@ static u16 tcp_select_window(struct sock *sk)
 | 
				
			||||||
	/* Make sure we do not exceed the maximum possible
 | 
						/* Make sure we do not exceed the maximum possible
 | 
				
			||||||
	 * scaled window.
 | 
						 * scaled window.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!tp->rx_opt.rcv_wscale)
 | 
						if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
 | 
				
			||||||
		new_win = min(new_win, MAX_TCP_WINDOW);
 | 
							new_win = min(new_win, MAX_TCP_WINDOW);
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
		new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
 | 
							new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue