forked from mirrors/linux
		
	af_unix: dont send SCM_CREDENTIALS by default
Since commit 7361c36c52 (af_unix: Allow credentials to work across
user and pid namespaces) af_unix performance dropped a lot.
This is because we now take a reference on pid and cred in each write(),
and release them in read(), usually done from another process,
eventually from another cpu. This triggers false sharing.
# Events: 154K cycles
#
# Overhead  Command       Shared Object        Symbol
# ........  .......  ..................  .........................
#
    10.40%  hackbench  [kernel.kallsyms]   [k] put_pid
     8.60%  hackbench  [kernel.kallsyms]   [k] unix_stream_recvmsg
     7.87%  hackbench  [kernel.kallsyms]   [k] unix_stream_sendmsg
     6.11%  hackbench  [kernel.kallsyms]   [k] do_raw_spin_lock
     4.95%  hackbench  [kernel.kallsyms]   [k] unix_scm_to_skb
     4.87%  hackbench  [kernel.kallsyms]   [k] pid_nr_ns
     4.34%  hackbench  [kernel.kallsyms]   [k] cred_to_ucred
     2.39%  hackbench  [kernel.kallsyms]   [k] unix_destruct_scm
     2.24%  hackbench  [kernel.kallsyms]   [k] sub_preempt_count
     1.75%  hackbench  [kernel.kallsyms]   [k] fget_light
     1.51%  hackbench  [kernel.kallsyms]   [k]
__mutex_lock_interruptible_slowpath
     1.42%  hackbench  [kernel.kallsyms]   [k] sock_alloc_send_pskb
This patch includes SCM_CREDENTIALS information in a af_unix message/skb
only if requested by the sender, [man 7 unix for details how to include
ancillary data using sendmsg() system call]
Note: This might break buggy applications that expected SCM_CREDENTIAL
from an unaware write() system call, and receiver not using SO_PASSCRED
socket option.
If SOCK_PASSCRED is set on source or destination socket, we still
include credentials for mere write() syscalls.
Performance boost in hackbench : more than 50% gain on a 16 thread
machine (2 quad-core cpus, 2 threads per core)
hackbench 20 thread 2000
4.228 sec instead of 9.102 sec
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
			
			
This commit is contained in:
		
							parent
							
								
									a9e9fd7182
								
							
						
					
					
						commit
						16e5726269
					
				
					 4 changed files with 33 additions and 11 deletions
				
			
		|  | @ -49,7 +49,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm, | ||||||
| 				    struct pid *pid, const struct cred *cred) | 				    struct pid *pid, const struct cred *cred) | ||||||
| { | { | ||||||
| 	scm->pid  = get_pid(pid); | 	scm->pid  = get_pid(pid); | ||||||
| 	scm->cred = get_cred(cred); | 	scm->cred = cred ? get_cred(cred) : NULL; | ||||||
| 	cred_to_ucred(pid, cred, &scm->creds); | 	cred_to_ucred(pid, cred, &scm->creds); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -73,8 +73,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm) | ||||||
| static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, | static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, | ||||||
| 			       struct scm_cookie *scm) | 			       struct scm_cookie *scm) | ||||||
| { | { | ||||||
| 	scm_set_cred(scm, task_tgid(current), current_cred()); | 	memset(scm, 0, sizeof(*scm)); | ||||||
| 	scm->fp = NULL; |  | ||||||
| 	unix_get_peersec_dgram(sock, scm); | 	unix_get_peersec_dgram(sock, scm); | ||||||
| 	if (msg->msg_controllen <= 0) | 	if (msg->msg_controllen <= 0) | ||||||
| 		return 0; | 		return 0; | ||||||
|  |  | ||||||
|  | @ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) | ||||||
| 			if (err) | 			if (err) | ||||||
| 				goto error; | 				goto error; | ||||||
| 
 | 
 | ||||||
| 			if (pid_vnr(p->pid) != p->creds.pid) { | 			if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { | ||||||
| 				struct pid *pid; | 				struct pid *pid; | ||||||
| 				err = -ESRCH; | 				err = -ESRCH; | ||||||
| 				pid = find_get_pid(p->creds.pid); | 				pid = find_get_pid(p->creds.pid); | ||||||
|  | @ -183,7 +183,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) | ||||||
| 				p->pid = pid; | 				p->pid = pid; | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			if ((p->cred->euid != p->creds.uid) || | 			if (!p->cred || | ||||||
|  | 			    (p->cred->euid != p->creds.uid) || | ||||||
| 			    (p->cred->egid != p->creds.gid)) { | 			    (p->cred->egid != p->creds.gid)) { | ||||||
| 				struct cred *cred; | 				struct cred *cred; | ||||||
| 				err = -ENOMEM; | 				err = -ENOMEM; | ||||||
|  | @ -193,6 +194,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) | ||||||
| 
 | 
 | ||||||
| 				cred->uid = cred->euid = p->creds.uid; | 				cred->uid = cred->euid = p->creds.uid; | ||||||
| 				cred->gid = cred->egid = p->creds.gid; | 				cred->gid = cred->egid = p->creds.gid; | ||||||
|  | 				if (p->cred) | ||||||
| 					put_cred(p->cred); | 					put_cred(p->cred); | ||||||
| 				p->cred = cred; | 				p->cred = cred; | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
|  | @ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | ||||||
| 	if (msg->msg_flags&MSG_OOB) | 	if (msg->msg_flags&MSG_OOB) | ||||||
| 		return -EOPNOTSUPP; | 		return -EOPNOTSUPP; | ||||||
| 
 | 
 | ||||||
| 	if (NULL == siocb->scm) { | 	if (NULL == siocb->scm) | ||||||
| 		siocb->scm = &scm; | 		siocb->scm = &scm; | ||||||
| 		memset(&scm, 0, sizeof(scm)); | 
 | ||||||
| 	} |  | ||||||
| 	err = scm_send(sock, msg, siocb->scm); | 	err = scm_send(sock, msg, siocb->scm); | ||||||
| 	if (err < 0) | 	if (err < 0) | ||||||
| 		return err; | 		return err; | ||||||
|  |  | ||||||
|  | @ -1381,7 +1381,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) | ||||||
| static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) | static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) | ||||||
| { | { | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
|  | 
 | ||||||
| 	UNIXCB(skb).pid  = get_pid(scm->pid); | 	UNIXCB(skb).pid  = get_pid(scm->pid); | ||||||
|  | 	if (scm->cred) | ||||||
| 		UNIXCB(skb).cred = get_cred(scm->cred); | 		UNIXCB(skb).cred = get_cred(scm->cred); | ||||||
| 	UNIXCB(skb).fp = NULL; | 	UNIXCB(skb).fp = NULL; | ||||||
| 	if (scm->fp && send_fds) | 	if (scm->fp && send_fds) | ||||||
|  | @ -1391,6 +1393,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Some apps rely on write() giving SCM_CREDENTIALS | ||||||
|  |  * We include credentials if source or destination socket | ||||||
|  |  * asserted SOCK_PASSCRED. | ||||||
|  |  */ | ||||||
|  | static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, | ||||||
|  | 			    const struct sock *other) | ||||||
|  | { | ||||||
|  | 	if (UNIXCB(skb).cred) | ||||||
|  | 		return; | ||||||
|  | 	if (test_bit(SOCK_PASSCRED, &sock->flags) || | ||||||
|  | 	    !other->sk_socket || | ||||||
|  | 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { | ||||||
|  | 		UNIXCB(skb).pid  = get_pid(task_tgid(current)); | ||||||
|  | 		UNIXCB(skb).cred = get_current_cred(); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  *	Send AF_UNIX data. |  *	Send AF_UNIX data. | ||||||
|  */ |  */ | ||||||
|  | @ -1538,6 +1558,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, | ||||||
| 
 | 
 | ||||||
| 	if (sock_flag(other, SOCK_RCVTSTAMP)) | 	if (sock_flag(other, SOCK_RCVTSTAMP)) | ||||||
| 		__net_timestamp(skb); | 		__net_timestamp(skb); | ||||||
|  | 	maybe_add_creds(skb, sock, other); | ||||||
| 	skb_queue_tail(&other->sk_receive_queue, skb); | 	skb_queue_tail(&other->sk_receive_queue, skb); | ||||||
| 	if (max_level > unix_sk(other)->recursion_level) | 	if (max_level > unix_sk(other)->recursion_level) | ||||||
| 		unix_sk(other)->recursion_level = max_level; | 		unix_sk(other)->recursion_level = max_level; | ||||||
|  | @ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | ||||||
| 		    (other->sk_shutdown & RCV_SHUTDOWN)) | 		    (other->sk_shutdown & RCV_SHUTDOWN)) | ||||||
| 			goto pipe_err_free; | 			goto pipe_err_free; | ||||||
| 
 | 
 | ||||||
|  | 		maybe_add_creds(skb, sock, other); | ||||||
| 		skb_queue_tail(&other->sk_receive_queue, skb); | 		skb_queue_tail(&other->sk_receive_queue, skb); | ||||||
| 		if (max_level > unix_sk(other)->recursion_level) | 		if (max_level > unix_sk(other)->recursion_level) | ||||||
| 			unix_sk(other)->recursion_level = max_level; | 			unix_sk(other)->recursion_level = max_level; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet