mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	net: Use a percpu_counter for orphan_count
Instead of using one atomic_t per protocol, use a percpu_counter for "orphan_count", to reduce cache line contention on heavy duty network servers. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									1748376b66
								
							
						
					
					
						commit
						dd24c00191
					
				
					 8 changed files with 24 additions and 18 deletions
				
			
		| 
						 | 
					@ -666,7 +666,7 @@ struct proto {
 | 
				
			||||||
	unsigned int		obj_size;
 | 
						unsigned int		obj_size;
 | 
				
			||||||
	int			slab_flags;
 | 
						int			slab_flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	atomic_t		*orphan_count;
 | 
						struct percpu_counter	*orphan_count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct request_sock_ops	*rsk_prot;
 | 
						struct request_sock_ops	*rsk_prot;
 | 
				
			||||||
	struct timewait_sock_ops *twsk_prot;
 | 
						struct timewait_sock_ops *twsk_prot;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -46,7 +46,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct inet_hashinfo tcp_hashinfo;
 | 
					extern struct inet_hashinfo tcp_hashinfo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern atomic_t tcp_orphan_count;
 | 
					extern struct percpu_counter tcp_orphan_count;
 | 
				
			||||||
extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 | 
					extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MAX_TCP_HEADER	(128 + MAX_HEADER)
 | 
					#define MAX_TCP_HEADER	(128 + MAX_HEADER)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -49,7 +49,7 @@ extern int dccp_debug;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct inet_hashinfo dccp_hashinfo;
 | 
					extern struct inet_hashinfo dccp_hashinfo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern atomic_t dccp_orphan_count;
 | 
					extern struct percpu_counter dccp_orphan_count;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 | 
					extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
EXPORT_SYMBOL_GPL(dccp_statistics);
 | 
					EXPORT_SYMBOL_GPL(dccp_statistics);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
atomic_t dccp_orphan_count = ATOMIC_INIT(0);
 | 
					struct percpu_counter dccp_orphan_count;
 | 
				
			||||||
 | 
					 | 
				
			||||||
EXPORT_SYMBOL_GPL(dccp_orphan_count);
 | 
					EXPORT_SYMBOL_GPL(dccp_orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct inet_hashinfo dccp_hashinfo;
 | 
					struct inet_hashinfo dccp_hashinfo;
 | 
				
			||||||
| 
						 | 
					@ -1000,7 +999,7 @@ void dccp_close(struct sock *sk, long timeout)
 | 
				
			||||||
	state = sk->sk_state;
 | 
						state = sk->sk_state;
 | 
				
			||||||
	sock_hold(sk);
 | 
						sock_hold(sk);
 | 
				
			||||||
	sock_orphan(sk);
 | 
						sock_orphan(sk);
 | 
				
			||||||
	atomic_inc(sk->sk_prot->orphan_count);
 | 
						percpu_counter_inc(sk->sk_prot->orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * It is the last release_sock in its life. It will remove backlog.
 | 
						 * It is the last release_sock in its life. It will remove backlog.
 | 
				
			||||||
| 
						 | 
					@ -1064,18 +1063,21 @@ static int __init dccp_init(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long goal;
 | 
						unsigned long goal;
 | 
				
			||||||
	int ehash_order, bhash_order, i;
 | 
						int ehash_order, bhash_order, i;
 | 
				
			||||||
	int rc = -ENOBUFS;
 | 
						int rc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
 | 
						BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
 | 
				
			||||||
		     FIELD_SIZEOF(struct sk_buff, cb));
 | 
							     FIELD_SIZEOF(struct sk_buff, cb));
 | 
				
			||||||
 | 
						rc = percpu_counter_init(&dccp_orphan_count, 0);
 | 
				
			||||||
 | 
						if (rc)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						rc = -ENOBUFS;
 | 
				
			||||||
	inet_hashinfo_init(&dccp_hashinfo);
 | 
						inet_hashinfo_init(&dccp_hashinfo);
 | 
				
			||||||
	dccp_hashinfo.bind_bucket_cachep =
 | 
						dccp_hashinfo.bind_bucket_cachep =
 | 
				
			||||||
		kmem_cache_create("dccp_bind_bucket",
 | 
							kmem_cache_create("dccp_bind_bucket",
 | 
				
			||||||
				  sizeof(struct inet_bind_bucket), 0,
 | 
									  sizeof(struct inet_bind_bucket), 0,
 | 
				
			||||||
				  SLAB_HWCACHE_ALIGN, NULL);
 | 
									  SLAB_HWCACHE_ALIGN, NULL);
 | 
				
			||||||
	if (!dccp_hashinfo.bind_bucket_cachep)
 | 
						if (!dccp_hashinfo.bind_bucket_cachep)
 | 
				
			||||||
		goto out;
 | 
							goto out_free_percpu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Size and allocate the main established and bind bucket
 | 
						 * Size and allocate the main established and bind bucket
 | 
				
			||||||
| 
						 | 
					@ -1168,6 +1170,8 @@ static int __init dccp_init(void)
 | 
				
			||||||
out_free_bind_bucket_cachep:
 | 
					out_free_bind_bucket_cachep:
 | 
				
			||||||
	kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
 | 
						kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
 | 
				
			||||||
	dccp_hashinfo.bind_bucket_cachep = NULL;
 | 
						dccp_hashinfo.bind_bucket_cachep = NULL;
 | 
				
			||||||
 | 
					out_free_percpu:
 | 
				
			||||||
 | 
						percpu_counter_destroy(&dccp_orphan_count);
 | 
				
			||||||
	goto out;
 | 
						goto out;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sk_refcnt_debug_release(sk);
 | 
						sk_refcnt_debug_release(sk);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	atomic_dec(sk->sk_prot->orphan_count);
 | 
						percpu_counter_dec(sk->sk_prot->orphan_count);
 | 
				
			||||||
	sock_put(sk);
 | 
						sock_put(sk);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		sock_orphan(child);
 | 
							sock_orphan(child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		atomic_inc(sk->sk_prot->orphan_count);
 | 
							percpu_counter_inc(sk->sk_prot->orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		inet_csk_destroy_sock(child);
 | 
							inet_csk_destroy_sock(child);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 | 
				
			||||||
	socket_seq_show(seq);
 | 
						socket_seq_show(seq);
 | 
				
			||||||
	seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 | 
						seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
 | 
				
			||||||
		   sock_prot_inuse_get(net, &tcp_prot),
 | 
							   sock_prot_inuse_get(net, &tcp_prot),
 | 
				
			||||||
		   atomic_read(&tcp_orphan_count),
 | 
							   (int)percpu_counter_sum_positive(&tcp_orphan_count),
 | 
				
			||||||
		   tcp_death_row.tw_count,
 | 
							   tcp_death_row.tw_count,
 | 
				
			||||||
		   (int)percpu_counter_sum_positive(&tcp_sockets_allocated),
 | 
							   (int)percpu_counter_sum_positive(&tcp_sockets_allocated),
 | 
				
			||||||
		   atomic_read(&tcp_memory_allocated));
 | 
							   atomic_read(&tcp_memory_allocated));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -277,8 +277,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 | 
					int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
atomic_t tcp_orphan_count = ATOMIC_INIT(0);
 | 
					struct percpu_counter tcp_orphan_count;
 | 
				
			||||||
 | 
					 | 
				
			||||||
EXPORT_SYMBOL_GPL(tcp_orphan_count);
 | 
					EXPORT_SYMBOL_GPL(tcp_orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int sysctl_tcp_mem[3] __read_mostly;
 | 
					int sysctl_tcp_mem[3] __read_mostly;
 | 
				
			||||||
| 
						 | 
					@ -1837,7 +1836,7 @@ void tcp_close(struct sock *sk, long timeout)
 | 
				
			||||||
	state = sk->sk_state;
 | 
						state = sk->sk_state;
 | 
				
			||||||
	sock_hold(sk);
 | 
						sock_hold(sk);
 | 
				
			||||||
	sock_orphan(sk);
 | 
						sock_orphan(sk);
 | 
				
			||||||
	atomic_inc(sk->sk_prot->orphan_count);
 | 
						percpu_counter_inc(sk->sk_prot->orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* It is the last release_sock in its life. It will remove backlog. */
 | 
						/* It is the last release_sock in its life. It will remove backlog. */
 | 
				
			||||||
	release_sock(sk);
 | 
						release_sock(sk);
 | 
				
			||||||
| 
						 | 
					@ -1888,9 +1887,11 @@ void tcp_close(struct sock *sk, long timeout)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (sk->sk_state != TCP_CLOSE) {
 | 
						if (sk->sk_state != TCP_CLOSE) {
 | 
				
			||||||
 | 
							int orphan_count = percpu_counter_read_positive(
 | 
				
			||||||
 | 
											sk->sk_prot->orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		sk_mem_reclaim(sk);
 | 
							sk_mem_reclaim(sk);
 | 
				
			||||||
		if (tcp_too_many_orphans(sk,
 | 
							if (tcp_too_many_orphans(sk, orphan_count)) {
 | 
				
			||||||
				atomic_read(sk->sk_prot->orphan_count))) {
 | 
					 | 
				
			||||||
			if (net_ratelimit())
 | 
								if (net_ratelimit())
 | 
				
			||||||
				printk(KERN_INFO "TCP: too many of orphaned "
 | 
									printk(KERN_INFO "TCP: too many of orphaned "
 | 
				
			||||||
				       "sockets\n");
 | 
									       "sockets\n");
 | 
				
			||||||
| 
						 | 
					@ -2689,6 +2690,7 @@ void __init tcp_init(void)
 | 
				
			||||||
	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 | 
						BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	percpu_counter_init(&tcp_sockets_allocated, 0);
 | 
						percpu_counter_init(&tcp_sockets_allocated, 0);
 | 
				
			||||||
 | 
						percpu_counter_init(&tcp_orphan_count, 0);
 | 
				
			||||||
	tcp_hashinfo.bind_bucket_cachep =
 | 
						tcp_hashinfo.bind_bucket_cachep =
 | 
				
			||||||
		kmem_cache_create("tcp_bind_bucket",
 | 
							kmem_cache_create("tcp_bind_bucket",
 | 
				
			||||||
				  sizeof(struct inet_bind_bucket), 0,
 | 
									  sizeof(struct inet_bind_bucket), 0,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk)
 | 
				
			||||||
static int tcp_out_of_resources(struct sock *sk, int do_reset)
 | 
					static int tcp_out_of_resources(struct sock *sk, int do_reset)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct tcp_sock *tp = tcp_sk(sk);
 | 
						struct tcp_sock *tp = tcp_sk(sk);
 | 
				
			||||||
	int orphans = atomic_read(&tcp_orphan_count);
 | 
						int orphans = percpu_counter_read_positive(&tcp_orphan_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* If peer does not open window for long time, or did not transmit
 | 
						/* If peer does not open window for long time, or did not transmit
 | 
				
			||||||
	 * anything for long time, penalize it. */
 | 
						 * anything for long time, penalize it. */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue