forked from mirrors/linux
		
	The hash table of AF_UNIX sockets is protected by the single lock.  This
patch replaces it with per-hash locks.
The effect is noticeable when we handle multiple sockets simultaneously.
Here is a test result on an EC2 c5.24xlarge instance.  It shows latency
(under 10us only) in unix_insert_unbound_socket() while 64 CPUs creating
1024 sockets for each in parallel.
  Without this patch:
     nsec          : count     distribution
        0          : 179      |                                        |
        500        : 3021     |*********                               |
        1000       : 6271     |*******************                     |
        1500       : 6318     |*******************                     |
        2000       : 5828     |*****************                       |
        2500       : 5124     |***************                         |
        3000       : 4426     |*************                           |
        3500       : 3672     |***********                             |
        4000       : 3138     |*********                               |
        4500       : 2811     |********                                |
        5000       : 2384     |*******                                 |
        5500       : 2023     |******                                  |
        6000       : 1954     |*****                                   |
        6500       : 1737     |*****                                   |
        7000       : 1749     |*****                                   |
        7500       : 1520     |****                                    |
        8000       : 1469     |****                                    |
        8500       : 1394     |****                                    |
        9000       : 1232     |***                                     |
        9500       : 1138     |***                                     |
        10000      : 994      |***                                     |
  With this patch:
     nsec          : count     distribution
        0          : 1634     |****                                    |
        500        : 13170    |****************************************|
        1000       : 13156    |*************************************** |
        1500       : 9010     |***************************             |
        2000       : 6363     |*******************                     |
        2500       : 4443     |*************                           |
        3000       : 3240     |*********                               |
        3500       : 2549     |*******                                 |
        4000       : 1872     |*****                                   |
        4500       : 1504     |****                                    |
        5000       : 1247     |***                                     |
        5500       : 1035     |***                                     |
        6000       : 889      |**                                      |
        6500       : 744      |**                                      |
        7000       : 634      |*                                       |
        7500       : 498      |*                                       |
        8000       : 433      |*                                       |
        8500       : 355      |*                                       |
        9000       : 336      |*                                       |
        9500       : 284      |                                        |
        10000      : 243      |                                        |
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
		
	
			
		
			
				
	
	
		
			110 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			110 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* SPDX-License-Identifier: GPL-2.0 */
 | 
						|
#ifndef __LINUX_NET_AFUNIX_H
 | 
						|
#define __LINUX_NET_AFUNIX_H
 | 
						|
 | 
						|
#include <linux/socket.h>
 | 
						|
#include <linux/un.h>
 | 
						|
#include <linux/mutex.h>
 | 
						|
#include <linux/refcount.h>
 | 
						|
#include <net/sock.h>
 | 
						|
 | 
						|
void unix_inflight(struct user_struct *user, struct file *fp);
 | 
						|
void unix_notinflight(struct user_struct *user, struct file *fp);
 | 
						|
void unix_destruct_scm(struct sk_buff *skb);
 | 
						|
void unix_gc(void);
 | 
						|
void wait_for_unix_gc(void);
 | 
						|
struct sock *unix_get_socket(struct file *filp);
 | 
						|
struct sock *unix_peer_get(struct sock *sk);
 | 
						|
 | 
						|
#define UNIX_HASH_SIZE	256
 | 
						|
#define UNIX_HASH_BITS	8
 | 
						|
 | 
						|
extern unsigned int unix_tot_inflight;
 | 
						|
extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
 | 
						|
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 | 
						|
 | 
						|
struct unix_address {
 | 
						|
	refcount_t	refcnt;
 | 
						|
	int		len;
 | 
						|
	struct sockaddr_un name[];
 | 
						|
};
 | 
						|
 | 
						|
struct unix_skb_parms {
 | 
						|
	struct pid		*pid;		/* Skb credentials	*/
 | 
						|
	kuid_t			uid;
 | 
						|
	kgid_t			gid;
 | 
						|
	struct scm_fp_list	*fp;		/* Passed files		*/
 | 
						|
#ifdef CONFIG_SECURITY_NETWORK
 | 
						|
	u32			secid;		/* Security ID		*/
 | 
						|
#endif
 | 
						|
	u32			consumed;
 | 
						|
} __randomize_layout;
 | 
						|
 | 
						|
struct scm_stat {
 | 
						|
	atomic_t nr_fds;
 | 
						|
};
 | 
						|
 | 
						|
#define UNIXCB(skb)	(*(struct unix_skb_parms *)&((skb)->cb))
 | 
						|
 | 
						|
#define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 | 
						|
#define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
 | 
						|
#define unix_state_lock_nested(s) \
 | 
						|
				spin_lock_nested(&unix_sk(s)->lock, \
 | 
						|
				SINGLE_DEPTH_NESTING)
 | 
						|
 | 
						|
/* The AF_UNIX socket */
 | 
						|
struct unix_sock {
 | 
						|
	/* WARNING: sk has to be the first member */
 | 
						|
	struct sock		sk;
 | 
						|
	struct unix_address	*addr;
 | 
						|
	struct path		path;
 | 
						|
	struct mutex		iolock, bindlock;
 | 
						|
	struct sock		*peer;
 | 
						|
	struct list_head	link;
 | 
						|
	atomic_long_t		inflight;
 | 
						|
	spinlock_t		lock;
 | 
						|
	unsigned long		gc_flags;
 | 
						|
#define UNIX_GC_CANDIDATE	0
 | 
						|
#define UNIX_GC_MAYBE_CYCLE	1
 | 
						|
	struct socket_wq	peer_wq;
 | 
						|
	wait_queue_entry_t	peer_wake;
 | 
						|
	struct scm_stat		scm_stat;
 | 
						|
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
 | 
						|
	struct sk_buff		*oob_skb;
 | 
						|
#endif
 | 
						|
};
 | 
						|
 | 
						|
static inline struct unix_sock *unix_sk(const struct sock *sk)
 | 
						|
{
 | 
						|
	return (struct unix_sock *)sk;
 | 
						|
}
 | 
						|
 | 
						|
#define peer_wait peer_wq.wait
 | 
						|
 | 
						|
long unix_inq_len(struct sock *sk);
 | 
						|
long unix_outq_len(struct sock *sk);
 | 
						|
 | 
						|
int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
 | 
						|
			 int flags);
 | 
						|
int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
 | 
						|
			  int flags);
 | 
						|
#ifdef CONFIG_SYSCTL
 | 
						|
int unix_sysctl_register(struct net *net);
 | 
						|
void unix_sysctl_unregister(struct net *net);
 | 
						|
#else
 | 
						|
static inline int unix_sysctl_register(struct net *net) { return 0; }
 | 
						|
static inline void unix_sysctl_unregister(struct net *net) {}
 | 
						|
#endif
 | 
						|
 | 
						|
#ifdef CONFIG_BPF_SYSCALL
 | 
						|
extern struct proto unix_dgram_proto;
 | 
						|
extern struct proto unix_stream_proto;
 | 
						|
 | 
						|
int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
 | 
						|
int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
 | 
						|
void __init unix_bpf_build_proto(void);
 | 
						|
#else
 | 
						|
static inline void __init unix_bpf_build_proto(void)
 | 
						|
{}
 | 
						|
#endif
 | 
						|
#endif
 |