mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	net: Introduce net.ipv4.tcp_migrate_req.
This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this option is enabled or eBPF program is attached, we will be able to migrate child sockets from a listener to another in the same reuseport group after close() or shutdown() syscalls. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20210612123224.12525-2-kuniyu@amazon.co.jp
This commit is contained in:
		
							parent
							
								
									bbf29d3a2e
								
							
						
					
					
						commit
						f9ac779f88
					
				
					 3 changed files with 35 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -761,6 +761,31 @@ tcp_syncookies - INTEGER
 | 
			
		|||
	network connections you can set this knob to 2 to enable
 | 
			
		||||
	unconditionally generation of syncookies.
 | 
			
		||||
 | 
			
		||||
tcp_migrate_req - BOOLEAN
 | 
			
		||||
	The incoming connection is tied to a specific listening socket when
 | 
			
		||||
	the initial SYN packet is received during the three-way handshake.
 | 
			
		||||
	When a listener is closed, in-flight request sockets during the
 | 
			
		||||
	handshake and established sockets in the accept queue are aborted.
 | 
			
		||||
 | 
			
		||||
	If the listener has SO_REUSEPORT enabled, other listeners on the
 | 
			
		||||
	same port should have been able to accept such connections. This
 | 
			
		||||
	option makes it possible to migrate such child sockets to another
 | 
			
		||||
	listener after close() or shutdown().
 | 
			
		||||
 | 
			
		||||
	The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should
 | 
			
		||||
	usually be used to define the policy to pick an alive listener.
 | 
			
		||||
	Otherwise, the kernel will randomly pick an alive listener only if
 | 
			
		||||
	this option is enabled.
 | 
			
		||||
 | 
			
		||||
	Note that migration between listeners with different settings may
 | 
			
		||||
	crash applications. Let's say migration happens from listener A to
 | 
			
		||||
	B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from
 | 
			
		||||
	the requests migrated from A. To avoid such a situation, cancel
 | 
			
		||||
	migration by returning SK_DROP in the type of eBPF program, or
 | 
			
		||||
	disable this option.
 | 
			
		||||
 | 
			
		||||
	Default: 0
 | 
			
		||||
 | 
			
		||||
tcp_fastopen - INTEGER
 | 
			
		||||
	Enable TCP Fast Open (RFC7413) to send and accept data in the opening
 | 
			
		||||
	SYN packet.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -126,6 +126,7 @@ struct netns_ipv4 {
 | 
			
		|||
	u8 sysctl_tcp_syn_retries;
 | 
			
		||||
	u8 sysctl_tcp_synack_retries;
 | 
			
		||||
	u8 sysctl_tcp_syncookies;
 | 
			
		||||
	u8 sysctl_tcp_migrate_req;
 | 
			
		||||
	int sysctl_tcp_reordering;
 | 
			
		||||
	u8 sysctl_tcp_retries1;
 | 
			
		||||
	u8 sysctl_tcp_retries2;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -960,6 +960,15 @@ static struct ctl_table ipv4_net_table[] = {
 | 
			
		|||
		.proc_handler	= proc_dou8vec_minmax,
 | 
			
		||||
	},
 | 
			
		||||
#endif
 | 
			
		||||
	{
 | 
			
		||||
		.procname	= "tcp_migrate_req",
 | 
			
		||||
		.data		= &init_net.ipv4.sysctl_tcp_migrate_req,
 | 
			
		||||
		.maxlen		= sizeof(u8),
 | 
			
		||||
		.mode		= 0644,
 | 
			
		||||
		.proc_handler	= proc_dou8vec_minmax,
 | 
			
		||||
		.extra1		= SYSCTL_ZERO,
 | 
			
		||||
		.extra2		= SYSCTL_ONE
 | 
			
		||||
	},
 | 
			
		||||
	{
 | 
			
		||||
		.procname	= "tcp_reordering",
 | 
			
		||||
		.data		= &init_net.ipv4.sysctl_tcp_reordering,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue