forked from mirrors/linux
		
	af_unix: Fix splice-bind deadlock
On 2015/11/06, Dmitry Vyukov reported a deadlock involving the splice system call and AF_UNIX sockets, http://lists.openwall.net/netdev/2015/11/06/24 The situation was analyzed as (a while ago) A: socketpair() B: splice() from a pipe to /mnt/regular_file does sb_start_write() on /mnt C: try to freeze /mnt wait for B to finish with /mnt A: bind() try to bind our socket to /mnt/new_socket_name lock our socket, see it not bound yet decide that it needs to create something in /mnt try to do sb_start_write() on /mnt, block (it's waiting for C). D: splice() from the same pipe to our socket lock the pipe, see that socket is connected try to lock the socket, block waiting for A B: get around to actually feeding a chunk from pipe to file, try to lock the pipe. Deadlock. on 2015/11/10 by Al Viro, http://lists.openwall.net/netdev/2015/11/10/4 The patch fixes this by removing the kern_path_create related code from unix_mknod and executing it as part of unix_bind prior acquiring the readlock of the socket in question. This means that A (as used above) will sb_start_write on /mnt before it acquires the readlock, hence, it won't indirectly block B which first did a sb_start_write and then waited for a thread trying to acquire the readlock. Consequently, A being blocked by C waiting for B won't cause a deadlock anymore (effectively, both A and B acquire two locks in opposite order in the situation described above). Dmitry Vyukov(<dvyukov@google.com>) tested the original patch. Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									b5bdacf3bb
								
							
						
					
					
						commit
						c845acb324
					
				
					 1 changed files with 40 additions and 26 deletions
				
			
		|  | @ -953,32 +953,20 @@ static struct sock *unix_find_other(struct net *net, | ||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) | static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode, | ||||||
|  | 		      struct path *res) | ||||||
| { | { | ||||||
| 	struct dentry *dentry; | 	int err; | ||||||
| 	struct path path; |  | ||||||
| 	int err = 0; |  | ||||||
| 	/*
 |  | ||||||
| 	 * Get the parent directory, calculate the hash for last |  | ||||||
| 	 * component. |  | ||||||
| 	 */ |  | ||||||
| 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); |  | ||||||
| 	err = PTR_ERR(dentry); |  | ||||||
| 	if (IS_ERR(dentry)) |  | ||||||
| 		return err; |  | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	err = security_path_mknod(path, dentry, mode, 0); | ||||||
| 	 * All right, let's create it. |  | ||||||
| 	 */ |  | ||||||
| 	err = security_path_mknod(&path, dentry, mode, 0); |  | ||||||
| 	if (!err) { | 	if (!err) { | ||||||
| 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); | 		err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); | ||||||
| 		if (!err) { | 		if (!err) { | ||||||
| 			res->mnt = mntget(path.mnt); | 			res->mnt = mntget(path->mnt); | ||||||
| 			res->dentry = dget(dentry); | 			res->dentry = dget(dentry); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	done_path_create(&path, dentry); | 
 | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -989,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||||||
| 	struct unix_sock *u = unix_sk(sk); | 	struct unix_sock *u = unix_sk(sk); | ||||||
| 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; | 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; | ||||||
| 	char *sun_path = sunaddr->sun_path; | 	char *sun_path = sunaddr->sun_path; | ||||||
| 	int err; | 	int err, name_err; | ||||||
| 	unsigned int hash; | 	unsigned int hash; | ||||||
| 	struct unix_address *addr; | 	struct unix_address *addr; | ||||||
| 	struct hlist_head *list; | 	struct hlist_head *list; | ||||||
|  | 	struct path path; | ||||||
|  | 	struct dentry *dentry; | ||||||
| 
 | 
 | ||||||
| 	err = -EINVAL; | 	err = -EINVAL; | ||||||
| 	if (sunaddr->sun_family != AF_UNIX) | 	if (sunaddr->sun_family != AF_UNIX) | ||||||
|  | @ -1008,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||||||
| 		goto out; | 		goto out; | ||||||
| 	addr_len = err; | 	addr_len = err; | ||||||
| 
 | 
 | ||||||
|  | 	name_err = 0; | ||||||
|  | 	dentry = NULL; | ||||||
|  | 	if (sun_path[0]) { | ||||||
|  | 		/* Get the parent directory, calculate the hash for last
 | ||||||
|  | 		 * component. | ||||||
|  | 		 */ | ||||||
|  | 		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); | ||||||
|  | 
 | ||||||
|  | 		if (IS_ERR(dentry)) { | ||||||
|  | 			/* delay report until after 'already bound' check */ | ||||||
|  | 			name_err = PTR_ERR(dentry); | ||||||
|  | 			dentry = NULL; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	err = mutex_lock_interruptible(&u->readlock); | 	err = mutex_lock_interruptible(&u->readlock); | ||||||
| 	if (err) | 	if (err) | ||||||
| 		goto out; | 		goto out_path; | ||||||
| 
 | 
 | ||||||
| 	err = -EINVAL; | 	err = -EINVAL; | ||||||
| 	if (u->addr) | 	if (u->addr) | ||||||
| 		goto out_up; | 		goto out_up; | ||||||
| 
 | 
 | ||||||
|  | 	if (name_err) { | ||||||
|  | 		err = name_err == -EEXIST ? -EADDRINUSE : name_err; | ||||||
|  | 		goto out_up; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	err = -ENOMEM; | 	err = -ENOMEM; | ||||||
| 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); | 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); | ||||||
| 	if (!addr) | 	if (!addr) | ||||||
|  | @ -1026,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||||||
| 	addr->hash = hash ^ sk->sk_type; | 	addr->hash = hash ^ sk->sk_type; | ||||||
| 	atomic_set(&addr->refcnt, 1); | 	atomic_set(&addr->refcnt, 1); | ||||||
| 
 | 
 | ||||||
| 	if (sun_path[0]) { | 	if (dentry) { | ||||||
| 		struct path path; | 		struct path u_path; | ||||||
| 		umode_t mode = S_IFSOCK | | 		umode_t mode = S_IFSOCK | | ||||||
| 		       (SOCK_INODE(sock)->i_mode & ~current_umask()); | 		       (SOCK_INODE(sock)->i_mode & ~current_umask()); | ||||||
| 		err = unix_mknod(sun_path, mode, &path); | 		err = unix_mknod(dentry, &path, mode, &u_path); | ||||||
| 		if (err) { | 		if (err) { | ||||||
| 			if (err == -EEXIST) | 			if (err == -EEXIST) | ||||||
| 				err = -EADDRINUSE; | 				err = -EADDRINUSE; | ||||||
|  | @ -1038,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||||||
| 			goto out_up; | 			goto out_up; | ||||||
| 		} | 		} | ||||||
| 		addr->hash = UNIX_HASH_SIZE; | 		addr->hash = UNIX_HASH_SIZE; | ||||||
| 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); | 		hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); | ||||||
| 		spin_lock(&unix_table_lock); | 		spin_lock(&unix_table_lock); | ||||||
| 		u->path = path; | 		u->path = u_path; | ||||||
| 		list = &unix_socket_table[hash]; | 		list = &unix_socket_table[hash]; | ||||||
| 	} else { | 	} else { | ||||||
| 		spin_lock(&unix_table_lock); | 		spin_lock(&unix_table_lock); | ||||||
|  | @ -1063,6 +1073,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||||||
| 	spin_unlock(&unix_table_lock); | 	spin_unlock(&unix_table_lock); | ||||||
| out_up: | out_up: | ||||||
| 	mutex_unlock(&u->readlock); | 	mutex_unlock(&u->readlock); | ||||||
|  | out_path: | ||||||
|  | 	if (dentry) | ||||||
|  | 		done_path_create(&path, dentry); | ||||||
|  | 
 | ||||||
| out: | out: | ||||||
| 	return err; | 	return err; | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Rainer Weikusat
						Rainer Weikusat