forked from mirrors/linux
		
	net: unix: fix inflight counting bug in garbage collector
Previously I assumed that the receive queues of candidates don't change during the GC. This is only half true, nothing can be received from the queues (see comment in unix_gc()), but buffers could be added through the other half of the socket pair, which may still have file descriptors referring to it. This can result in inc_inflight_move_tail() erronously increasing the "inflight" counter for a unix socket for which dec_inflight() wasn't previously called. This in turn can trigger the "BUG_ON(total_refs < inflight_refs)" in a later garbage collection run. Fix this by only manipulating the "inflight" counter for sockets which are candidates themselves. Duplicating the file references in unix_attach_fds() is also needed to prevent a socket becoming a candidate for GC while the skb that contains it is not yet queued. Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> CC: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									058e3739f6
								
							
						
					
					
						commit
						6209344f5a
					
				
					 3 changed files with 62 additions and 19 deletions
				
			
		|  | @ -54,6 +54,7 @@ struct unix_sock { | ||||||
|         atomic_long_t           inflight; |         atomic_long_t           inflight; | ||||||
|         spinlock_t		lock; |         spinlock_t		lock; | ||||||
| 	unsigned int		gc_candidate : 1; | 	unsigned int		gc_candidate : 1; | ||||||
|  | 	unsigned int		gc_maybe_cycle : 1; | ||||||
|         wait_queue_head_t       peer_wait; |         wait_queue_head_t       peer_wait; | ||||||
| }; | }; | ||||||
| #define unix_sk(__sk) ((struct unix_sock *)__sk) | #define unix_sk(__sk) ((struct unix_sock *)__sk) | ||||||
|  |  | ||||||
|  | @ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb) | ||||||
| 	sock_wfree(skb); | 	sock_wfree(skb); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) | static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) | ||||||
| { | { | ||||||
| 	int i; | 	int i; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Need to duplicate file references for the sake of garbage | ||||||
|  | 	 * collection.  Otherwise a socket in the fps might become a | ||||||
|  | 	 * candidate for GC while the skb is not yet queued. | ||||||
|  | 	 */ | ||||||
|  | 	UNIXCB(skb).fp = scm_fp_dup(scm->fp); | ||||||
|  | 	if (!UNIXCB(skb).fp) | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 
 | ||||||
| 	for (i=scm->fp->count-1; i>=0; i--) | 	for (i=scm->fp->count-1; i>=0; i--) | ||||||
| 		unix_inflight(scm->fp->fp[i]); | 		unix_inflight(scm->fp->fp[i]); | ||||||
| 	UNIXCB(skb).fp = scm->fp; |  | ||||||
| 	skb->destructor = unix_destruct_fds; | 	skb->destructor = unix_destruct_fds; | ||||||
| 	scm->fp = NULL; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | @ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | ||||||
| 	if (siocb->scm->fp) | 	if (siocb->scm->fp) { | ||||||
| 		unix_attach_fds(siocb->scm, skb); | 		err = unix_attach_fds(siocb->scm, skb); | ||||||
|  | 		if (err) | ||||||
|  | 			goto out_free; | ||||||
|  | 	} | ||||||
| 	unix_get_secdata(siocb->scm, skb); | 	unix_get_secdata(siocb->scm, skb); | ||||||
| 
 | 
 | ||||||
| 	skb_reset_transport_header(skb); | 	skb_reset_transport_header(skb); | ||||||
|  | @ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | ||||||
| 		size = min_t(int, size, skb_tailroom(skb)); | 		size = min_t(int, size, skb_tailroom(skb)); | ||||||
| 
 | 
 | ||||||
| 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); | ||||||
| 		if (siocb->scm->fp) | 		if (siocb->scm->fp) { | ||||||
| 			unix_attach_fds(siocb->scm, skb); | 			err = unix_attach_fds(siocb->scm, skb); | ||||||
|  | 			if (err) { | ||||||
|  | 				kfree_skb(skb); | ||||||
|  | 				goto out_err; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
| 
 | 
 | ||||||
| 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { | 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { | ||||||
| 			kfree_skb(skb); | 			kfree_skb(skb); | ||||||
|  |  | ||||||
|  | @ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), | ||||||
| 				 */ | 				 */ | ||||||
| 				struct sock *sk = unix_get_socket(*fp++); | 				struct sock *sk = unix_get_socket(*fp++); | ||||||
| 				if (sk) { | 				if (sk) { | ||||||
| 					hit = true; | 					struct unix_sock *u = unix_sk(sk); | ||||||
| 					func(unix_sk(sk)); | 
 | ||||||
|  | 					/*
 | ||||||
|  | 					 * Ignore non-candidates, they could | ||||||
|  | 					 * have been added to the queues after | ||||||
|  | 					 * starting the garbage collection | ||||||
|  | 					 */ | ||||||
|  | 					if (u->gc_candidate) { | ||||||
|  | 						hit = true; | ||||||
|  | 						func(u); | ||||||
|  | 					} | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 			if (hit && hitlist != NULL) { | 			if (hit && hitlist != NULL) { | ||||||
|  | @ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u) | ||||||
| { | { | ||||||
| 	atomic_long_inc(&u->inflight); | 	atomic_long_inc(&u->inflight); | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * If this is still a candidate, move it to the end of the | 	 * If this still might be part of a cycle, move it to the end | ||||||
| 	 * list, so that it's checked even if it was already passed | 	 * of the list, so that it's checked even if it was already | ||||||
| 	 * over | 	 * passed over | ||||||
| 	 */ | 	 */ | ||||||
| 	if (u->gc_candidate) | 	if (u->gc_maybe_cycle) | ||||||
| 		list_move_tail(&u->link, &gc_candidates); | 		list_move_tail(&u->link, &gc_candidates); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -267,6 +276,7 @@ void unix_gc(void) | ||||||
| 	struct unix_sock *next; | 	struct unix_sock *next; | ||||||
| 	struct sk_buff_head hitlist; | 	struct sk_buff_head hitlist; | ||||||
| 	struct list_head cursor; | 	struct list_head cursor; | ||||||
|  | 	LIST_HEAD(not_cycle_list); | ||||||
| 
 | 
 | ||||||
| 	spin_lock(&unix_gc_lock); | 	spin_lock(&unix_gc_lock); | ||||||
| 
 | 
 | ||||||
|  | @ -282,10 +292,14 @@ void unix_gc(void) | ||||||
| 	 * | 	 * | ||||||
| 	 * Holding unix_gc_lock will protect these candidates from | 	 * Holding unix_gc_lock will protect these candidates from | ||||||
| 	 * being detached, and hence from gaining an external | 	 * being detached, and hence from gaining an external | ||||||
| 	 * reference.  This also means, that since there are no | 	 * reference.  Since there are no possible receivers, all | ||||||
| 	 * possible receivers, the receive queues of these sockets are | 	 * buffers currently on the candidates' queues stay there | ||||||
| 	 * static during the GC, even though the dequeue is done | 	 * during the garbage collection. | ||||||
| 	 * before the detach without atomicity guarantees. | 	 * | ||||||
|  | 	 * We also know that no new candidate can be added onto the | ||||||
|  | 	 * receive queues.  Other, non candidate sockets _can_ be | ||||||
|  | 	 * added to queue, so we must make sure only to touch | ||||||
|  | 	 * candidates. | ||||||
| 	 */ | 	 */ | ||||||
| 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) { | 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) { | ||||||
| 		long total_refs; | 		long total_refs; | ||||||
|  | @ -299,6 +313,7 @@ void unix_gc(void) | ||||||
| 		if (total_refs == inflight_refs) { | 		if (total_refs == inflight_refs) { | ||||||
| 			list_move_tail(&u->link, &gc_candidates); | 			list_move_tail(&u->link, &gc_candidates); | ||||||
| 			u->gc_candidate = 1; | 			u->gc_candidate = 1; | ||||||
|  | 			u->gc_maybe_cycle = 1; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -325,13 +340,23 @@ void unix_gc(void) | ||||||
| 		list_move(&cursor, &u->link); | 		list_move(&cursor, &u->link); | ||||||
| 
 | 
 | ||||||
| 		if (atomic_long_read(&u->inflight) > 0) { | 		if (atomic_long_read(&u->inflight) > 0) { | ||||||
| 			list_move_tail(&u->link, &gc_inflight_list); | 			list_move_tail(&u->link, ¬_cycle_list); | ||||||
| 			u->gc_candidate = 0; | 			u->gc_maybe_cycle = 0; | ||||||
| 			scan_children(&u->sk, inc_inflight_move_tail, NULL); | 			scan_children(&u->sk, inc_inflight_move_tail, NULL); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	list_del(&cursor); | 	list_del(&cursor); | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * not_cycle_list contains those sockets which do not make up a | ||||||
|  | 	 * cycle.  Restore these to the inflight list. | ||||||
|  | 	 */ | ||||||
|  | 	while (!list_empty(¬_cycle_list)) { | ||||||
|  | 		u = list_entry(not_cycle_list.next, struct unix_sock, link); | ||||||
|  | 		u->gc_candidate = 0; | ||||||
|  | 		list_move_tail(&u->link, &gc_inflight_list); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Now gc_candidates contains only garbage.  Restore original | 	 * Now gc_candidates contains only garbage.  Restore original | ||||||
| 	 * inflight counters for these as well, and remove the skbuffs | 	 * inflight counters for these as well, and remove the skbuffs | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Miklos Szeredi
						Miklos Szeredi