mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	net: Introduce net.ipv4.tcp_migrate_req.
This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this option is enabled or eBPF program is attached, we will be able to migrate child sockets from a listener to another in the same reuseport group after close() or shutdown() syscalls. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20210612123224.12525-2-kuniyu@amazon.co.jp
This commit is contained in:
		
							parent
							
								
									bbf29d3a2e
								
							
						
					
					
						commit
						f9ac779f88
					
				
					 3 changed files with 35 additions and 0 deletions
				
			
		| 
						 | 
					@ -761,6 +761,31 @@ tcp_syncookies - INTEGER
 | 
				
			||||||
	network connections you can set this knob to 2 to enable
 | 
						network connections you can set this knob to 2 to enable
 | 
				
			||||||
	unconditionally generation of syncookies.
 | 
						unconditionally generation of syncookies.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					tcp_migrate_req - BOOLEAN
 | 
				
			||||||
 | 
						The incoming connection is tied to a specific listening socket when
 | 
				
			||||||
 | 
						the initial SYN packet is received during the three-way handshake.
 | 
				
			||||||
 | 
						When a listener is closed, in-flight request sockets during the
 | 
				
			||||||
 | 
						handshake and established sockets in the accept queue are aborted.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						If the listener has SO_REUSEPORT enabled, other listeners on the
 | 
				
			||||||
 | 
						same port should have been able to accept such connections. This
 | 
				
			||||||
 | 
						option makes it possible to migrate such child sockets to another
 | 
				
			||||||
 | 
						listener after close() or shutdown().
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should
 | 
				
			||||||
 | 
						usually be used to define the policy to pick an alive listener.
 | 
				
			||||||
 | 
						Otherwise, the kernel will randomly pick an alive listener only if
 | 
				
			||||||
 | 
						this option is enabled.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Note that migration between listeners with different settings may
 | 
				
			||||||
 | 
						crash applications. Let's say migration happens from listener A to
 | 
				
			||||||
 | 
						B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from
 | 
				
			||||||
 | 
						the requests migrated from A. To avoid such a situation, cancel
 | 
				
			||||||
 | 
						migration by returning SK_DROP in the type of eBPF program, or
 | 
				
			||||||
 | 
						disable this option.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Default: 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
tcp_fastopen - INTEGER
 | 
					tcp_fastopen - INTEGER
 | 
				
			||||||
	Enable TCP Fast Open (RFC7413) to send and accept data in the opening
 | 
						Enable TCP Fast Open (RFC7413) to send and accept data in the opening
 | 
				
			||||||
	SYN packet.
 | 
						SYN packet.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -126,6 +126,7 @@ struct netns_ipv4 {
 | 
				
			||||||
	u8 sysctl_tcp_syn_retries;
 | 
						u8 sysctl_tcp_syn_retries;
 | 
				
			||||||
	u8 sysctl_tcp_synack_retries;
 | 
						u8 sysctl_tcp_synack_retries;
 | 
				
			||||||
	u8 sysctl_tcp_syncookies;
 | 
						u8 sysctl_tcp_syncookies;
 | 
				
			||||||
 | 
						u8 sysctl_tcp_migrate_req;
 | 
				
			||||||
	int sysctl_tcp_reordering;
 | 
						int sysctl_tcp_reordering;
 | 
				
			||||||
	u8 sysctl_tcp_retries1;
 | 
						u8 sysctl_tcp_retries1;
 | 
				
			||||||
	u8 sysctl_tcp_retries2;
 | 
						u8 sysctl_tcp_retries2;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -960,6 +960,15 @@ static struct ctl_table ipv4_net_table[] = {
 | 
				
			||||||
		.proc_handler	= proc_dou8vec_minmax,
 | 
							.proc_handler	= proc_dou8vec_minmax,
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							.procname	= "tcp_migrate_req",
 | 
				
			||||||
 | 
							.data		= &init_net.ipv4.sysctl_tcp_migrate_req,
 | 
				
			||||||
 | 
							.maxlen		= sizeof(u8),
 | 
				
			||||||
 | 
							.mode		= 0644,
 | 
				
			||||||
 | 
							.proc_handler	= proc_dou8vec_minmax,
 | 
				
			||||||
 | 
							.extra1		= SYSCTL_ZERO,
 | 
				
			||||||
 | 
							.extra2		= SYSCTL_ONE
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		.procname	= "tcp_reordering",
 | 
							.procname	= "tcp_reordering",
 | 
				
			||||||
		.data		= &init_net.ipv4.sysctl_tcp_reordering,
 | 
							.data		= &init_net.ipv4.sysctl_tcp_reordering,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue