mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	net: add sk->sk_drop_counters
Some sockets suffer from heavy false sharing on sk->sk_drops, and fields in the same cache line. Add sk->sk_drop_counters to: - move the drop counter(s) to dedicated cache lines. - Add basic NUMA awareness to these drop counter(s). Following patches will use this infrastructure for UDP and RAW sockets. sk_clone_lock() is not yet ready, it would need to properly set newsk->sk_drop_counters if we plan to use this for TCP sockets. v2: used Paolo suggestion from https://lore.kernel.org/netdev/8f09830a-d83d-43c9-b36b-88ba0a23e9b2@redhat.com/ Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20250826125031.1578842-4-edumazet@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
		
							parent
							
								
									cb4d5a6eb6
								
							
						
					
					
						commit
						c51613fa27
					
				
					 2 changed files with 33 additions and 1 deletions
				
			
		| 
						 | 
					@ -102,6 +102,11 @@ struct net;
 | 
				
			||||||
typedef __u32 __bitwise __portpair;
 | 
					typedef __u32 __bitwise __portpair;
 | 
				
			||||||
typedef __u64 __bitwise __addrpair;
 | 
					typedef __u64 __bitwise __addrpair;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct socket_drop_counters {
 | 
				
			||||||
 | 
						atomic_t	drops0 ____cacheline_aligned_in_smp;
 | 
				
			||||||
 | 
						atomic_t	drops1 ____cacheline_aligned_in_smp;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 *	struct sock_common - minimal network layer representation of sockets
 | 
					 *	struct sock_common - minimal network layer representation of sockets
 | 
				
			||||||
 *	@skc_daddr: Foreign IPv4 addr
 | 
					 *	@skc_daddr: Foreign IPv4 addr
 | 
				
			||||||
| 
						 | 
					@ -282,6 +287,7 @@ struct sk_filter;
 | 
				
			||||||
  *	@sk_err_soft: errors that don't cause failure but are the cause of a
 | 
					  *	@sk_err_soft: errors that don't cause failure but are the cause of a
 | 
				
			||||||
  *		      persistent failure not just 'timed out'
 | 
					  *		      persistent failure not just 'timed out'
 | 
				
			||||||
  *	@sk_drops: raw/udp drops counter
 | 
					  *	@sk_drops: raw/udp drops counter
 | 
				
			||||||
 | 
					  *	@sk_drop_counters: optional pointer to socket_drop_counters
 | 
				
			||||||
  *	@sk_ack_backlog: current listen backlog
 | 
					  *	@sk_ack_backlog: current listen backlog
 | 
				
			||||||
  *	@sk_max_ack_backlog: listen backlog set in listen()
 | 
					  *	@sk_max_ack_backlog: listen backlog set in listen()
 | 
				
			||||||
  *	@sk_uid: user id of owner
 | 
					  *	@sk_uid: user id of owner
 | 
				
			||||||
| 
						 | 
					@ -449,6 +455,7 @@ struct sock {
 | 
				
			||||||
#ifdef CONFIG_XFRM
 | 
					#ifdef CONFIG_XFRM
 | 
				
			||||||
	struct xfrm_policy __rcu *sk_policy[2];
 | 
						struct xfrm_policy __rcu *sk_policy[2];
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
						struct socket_drop_counters *sk_drop_counters;
 | 
				
			||||||
	__cacheline_group_end(sock_read_rxtx);
 | 
						__cacheline_group_end(sock_read_rxtx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__cacheline_group_begin(sock_write_rxtx);
 | 
						__cacheline_group_begin(sock_write_rxtx);
 | 
				
			||||||
| 
						 | 
					@ -2684,7 +2691,18 @@ struct sock_skb_cb {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void sk_drops_add(struct sock *sk, int segs)
 | 
					static inline void sk_drops_add(struct sock *sk, int segs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	atomic_add(segs, &sk->sk_drops);
 | 
						struct socket_drop_counters *sdc = sk->sk_drop_counters;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sdc) {
 | 
				
			||||||
 | 
							int n = numa_node_id() % 2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (n)
 | 
				
			||||||
 | 
								atomic_add(segs, &sdc->drops1);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								atomic_add(segs, &sdc->drops0);
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							atomic_add(segs, &sk->sk_drops);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void sk_drops_inc(struct sock *sk)
 | 
					static inline void sk_drops_inc(struct sock *sk)
 | 
				
			||||||
| 
						 | 
					@ -2694,11 +2712,23 @@ static inline void sk_drops_inc(struct sock *sk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int sk_drops_read(const struct sock *sk)
 | 
					static inline int sk_drops_read(const struct sock *sk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						const struct socket_drop_counters *sdc = sk->sk_drop_counters;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sdc) {
 | 
				
			||||||
 | 
							DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
 | 
				
			||||||
 | 
							return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	return atomic_read(&sk->sk_drops);
 | 
						return atomic_read(&sk->sk_drops);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void sk_drops_reset(struct sock *sk)
 | 
					static inline void sk_drops_reset(struct sock *sk)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct socket_drop_counters *sdc = sk->sk_drop_counters;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (sdc) {
 | 
				
			||||||
 | 
							atomic_set(&sdc->drops0, 0);
 | 
				
			||||||
 | 
							atomic_set(&sdc->drops1, 0);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	atomic_set(&sk->sk_drops, 0);
 | 
						atomic_set(&sk->sk_drops, 0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2505,6 +2505,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 | 
				
			||||||
	newsk->sk_wmem_queued	= 0;
 | 
						newsk->sk_wmem_queued	= 0;
 | 
				
			||||||
	newsk->sk_forward_alloc = 0;
 | 
						newsk->sk_forward_alloc = 0;
 | 
				
			||||||
	newsk->sk_reserved_mem  = 0;
 | 
						newsk->sk_reserved_mem  = 0;
 | 
				
			||||||
 | 
						DEBUG_NET_WARN_ON_ONCE(newsk->sk_drop_counters);
 | 
				
			||||||
	sk_drops_reset(newsk);
 | 
						sk_drops_reset(newsk);
 | 
				
			||||||
	newsk->sk_send_head	= NULL;
 | 
						newsk->sk_send_head	= NULL;
 | 
				
			||||||
	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 | 
						newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 | 
				
			||||||
| 
						 | 
					@ -4457,6 +4458,7 @@ static int __init sock_struct_check(void)
 | 
				
			||||||
#ifdef CONFIG_MEMCG
 | 
					#ifdef CONFIG_MEMCG
 | 
				
			||||||
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
 | 
						CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
						CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_drop_counters);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
 | 
						CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
 | 
				
			||||||
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
 | 
						CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue