mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	tcp: Don't allocate tcp_death_row outside of struct netns_ipv4.
We will soon introduce an optional per-netns ehash and access hash tables via net->ipv4.tcp_death_row->hashinfo instead of &tcp_hashinfo in most places. It could harm the fast path because dereferences of two fields in net and tcp_death_row might incur two extra cache line misses. To save one dereference, let's place tcp_death_row back in netns_ipv4 and fetch hashinfo via net->ipv4.tcp_death_row"."hashinfo. Note tcp_death_row was initially placed in netns_ipv4, and commitfbb8295248("tcp: allocate tcp_death_row outside of struct netns_ipv4") changed it to a pointer so that we can fire TIME_WAIT timers after freeing net. However, we don't do so after commit04c494e68a("Revert "tcp/dccp: get rid of inet_twsk_purge()""), so we need not define tcp_death_row as a pointer. Also, we move refcount_dec_and_test(&tw_refcount) from tcp_sk_exit() to tcp_sk_exit_batch() as a debug check. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
		
							parent
							
								
									08eaef9040
								
							
						
					
					
						commit
						e9bd0cca09
					
				
					 7 changed files with 15 additions and 25 deletions
				
			
		| 
						 | 
				
			
			@ -34,6 +34,7 @@ struct inet_hashinfo;
 | 
			
		|||
struct inet_timewait_death_row {
 | 
			
		||||
	refcount_t		tw_refcount;
 | 
			
		||||
 | 
			
		||||
	/* Padding to avoid false sharing, tw_refcount can be often written */
 | 
			
		||||
	struct inet_hashinfo 	*hashinfo ____cacheline_aligned_in_smp;
 | 
			
		||||
	int			sysctl_max_tw_buckets;
 | 
			
		||||
};
 | 
			
		||||
| 
						 | 
				
			
			@ -41,7 +42,7 @@ struct inet_timewait_death_row {
 | 
			
		|||
struct tcp_fastopen_context;
 | 
			
		||||
 | 
			
		||||
struct netns_ipv4 {
 | 
			
		||||
	struct inet_timewait_death_row *tcp_death_row;
 | 
			
		||||
	struct inet_timewait_death_row tcp_death_row;
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SYSCTL
 | 
			
		||||
	struct ctl_table_header	*forw_hdr;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -59,9 +59,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
 | 
			
		|||
	inet_twsk_bind_unhash(tw, hashinfo);
 | 
			
		||||
	spin_unlock(&bhead->lock);
 | 
			
		||||
 | 
			
		||||
	if (refcount_dec_and_test(&tw->tw_dr->tw_refcount))
 | 
			
		||||
		kfree(tw->tw_dr);
 | 
			
		||||
 | 
			
		||||
	refcount_dec(&tw->tw_dr->tw_refcount);
 | 
			
		||||
	inet_twsk_put(tw);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -59,7 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 | 
			
		|||
	socket_seq_show(seq);
 | 
			
		||||
	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
 | 
			
		||||
		   sock_prot_inuse_get(net, &tcp_prot), orphans,
 | 
			
		||||
		   refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1,
 | 
			
		||||
		   refcount_read(&net->ipv4.tcp_death_row.tw_refcount) - 1,
 | 
			
		||||
		   sockets, proto_memory_allocated(&tcp_prot));
 | 
			
		||||
	seq_printf(seq, "UDP: inuse %d mem %ld\n",
 | 
			
		||||
		   sock_prot_inuse_get(net, &udp_prot),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -530,10 +530,9 @@ static struct ctl_table ipv4_table[] = {
 | 
			
		|||
};
 | 
			
		||||
 | 
			
		||||
static struct ctl_table ipv4_net_table[] = {
 | 
			
		||||
	/* tcp_max_tw_buckets must be first in this table. */
 | 
			
		||||
	{
 | 
			
		||||
		.procname	= "tcp_max_tw_buckets",
 | 
			
		||||
/*		.data		= &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */
 | 
			
		||||
		.data		= &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
 | 
			
		||||
		.maxlen		= sizeof(int),
 | 
			
		||||
		.mode		= 0644,
 | 
			
		||||
		.proc_handler	= proc_dointvec
 | 
			
		||||
| 
						 | 
				
			
			@ -1361,8 +1360,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 | 
			
		|||
		if (!table)
 | 
			
		||||
			goto err_alloc;
 | 
			
		||||
 | 
			
		||||
		/* skip first entry (sysctl_max_tw_buckets) */
 | 
			
		||||
		for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
 | 
			
		||||
		for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
 | 
			
		||||
			if (table[i].data) {
 | 
			
		||||
				/* Update the variables to point into
 | 
			
		||||
				 * the current struct net
 | 
			
		||||
| 
						 | 
				
			
			@ -1377,8 +1375,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets;
 | 
			
		||||
 | 
			
		||||
	net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
 | 
			
		||||
	if (!net->ipv4.ipv4_hdr)
 | 
			
		||||
		goto err_reg;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -292,7 +292,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 | 
			
		|||
	 * complete initialization after this.
 | 
			
		||||
	 */
 | 
			
		||||
	tcp_set_state(sk, TCP_SYN_SENT);
 | 
			
		||||
	tcp_death_row = net->ipv4.tcp_death_row;
 | 
			
		||||
	tcp_death_row = &net->ipv4.tcp_death_row;
 | 
			
		||||
	err = inet_hash_connect(tcp_death_row, sk);
 | 
			
		||||
	if (err)
 | 
			
		||||
		goto failure;
 | 
			
		||||
| 
						 | 
				
			
			@ -3091,13 +3091,9 @@ EXPORT_SYMBOL(tcp_prot);
 | 
			
		|||
 | 
			
		||||
static void __net_exit tcp_sk_exit(struct net *net)
 | 
			
		||||
{
 | 
			
		||||
	struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row;
 | 
			
		||||
 | 
			
		||||
	if (net->ipv4.tcp_congestion_control)
 | 
			
		||||
		bpf_module_put(net->ipv4.tcp_congestion_control,
 | 
			
		||||
			       net->ipv4.tcp_congestion_control->owner);
 | 
			
		||||
	if (refcount_dec_and_test(&tcp_death_row->tw_refcount))
 | 
			
		||||
		kfree(tcp_death_row);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int __net_init tcp_sk_init(struct net *net)
 | 
			
		||||
| 
						 | 
				
			
			@ -3129,13 +3125,10 @@ static int __net_init tcp_sk_init(struct net *net)
 | 
			
		|||
	net->ipv4.sysctl_tcp_tw_reuse = 2;
 | 
			
		||||
	net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
 | 
			
		||||
 | 
			
		||||
	net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL);
 | 
			
		||||
	if (!net->ipv4.tcp_death_row)
 | 
			
		||||
		return -ENOMEM;
 | 
			
		||||
	refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1);
 | 
			
		||||
	refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1);
 | 
			
		||||
	cnt = tcp_hashinfo.ehash_mask + 1;
 | 
			
		||||
	net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2;
 | 
			
		||||
	net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo;
 | 
			
		||||
	net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
 | 
			
		||||
	net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
 | 
			
		||||
 | 
			
		||||
	net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
 | 
			
		||||
	net->ipv4.sysctl_tcp_sack = 1;
 | 
			
		||||
| 
						 | 
				
			
			@ -3201,8 +3194,10 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 | 
			
		|||
 | 
			
		||||
	inet_twsk_purge(&tcp_hashinfo, AF_INET);
 | 
			
		||||
 | 
			
		||||
	list_for_each_entry(net, net_exit_list, exit_list)
 | 
			
		||||
	list_for_each_entry(net, net_exit_list, exit_list) {
 | 
			
		||||
		WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount));
 | 
			
		||||
		tcp_fastopen_ctx_destroy(net);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct pernet_operations __net_initdata tcp_sk_ops = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -250,7 +250,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 | 
			
		|||
	struct net *net = sock_net(sk);
 | 
			
		||||
	struct inet_timewait_sock *tw;
 | 
			
		||||
 | 
			
		||||
	tw = inet_twsk_alloc(sk, net->ipv4.tcp_death_row, state);
 | 
			
		||||
	tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state);
 | 
			
		||||
 | 
			
		||||
	if (tw) {
 | 
			
		||||
		struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -325,7 +325,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 | 
			
		|||
	inet->inet_dport = usin->sin6_port;
 | 
			
		||||
 | 
			
		||||
	tcp_set_state(sk, TCP_SYN_SENT);
 | 
			
		||||
	tcp_death_row = net->ipv4.tcp_death_row;
 | 
			
		||||
	tcp_death_row = &net->ipv4.tcp_death_row;
 | 
			
		||||
	err = inet6_hash_connect(tcp_death_row, sk);
 | 
			
		||||
	if (err)
 | 
			
		||||
		goto late_failure;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue