mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	net: unix: fix inflight counting bug in garbage collector
Previously I assumed that the receive queues of candidates don't change during the GC. This is only half true, nothing can be received from the queues (see comment in unix_gc()), but buffers could be added through the other half of the socket pair, which may still have file descriptors referring to it. This can result in inc_inflight_move_tail() erronously increasing the "inflight" counter for a unix socket for which dec_inflight() wasn't previously called. This in turn can trigger the "BUG_ON(total_refs < inflight_refs)" in a later garbage collection run. Fix this by only manipulating the "inflight" counter for sockets which are candidates themselves. Duplicating the file references in unix_attach_fds() is also needed to prevent a socket becoming a candidate for GC while the skb that contains it is not yet queued. Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> CC: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									058e3739f6
								
							
						
					
					
						commit
						6209344f5a
					
				
					 3 changed files with 62 additions and 19 deletions
				
			
		| 
						 | 
					@ -54,6 +54,7 @@ struct unix_sock {
 | 
				
			||||||
        atomic_long_t           inflight;
 | 
					        atomic_long_t           inflight;
 | 
				
			||||||
        spinlock_t		lock;
 | 
					        spinlock_t		lock;
 | 
				
			||||||
	unsigned int		gc_candidate : 1;
 | 
						unsigned int		gc_candidate : 1;
 | 
				
			||||||
 | 
						unsigned int		gc_maybe_cycle : 1;
 | 
				
			||||||
        wait_queue_head_t       peer_wait;
 | 
					        wait_queue_head_t       peer_wait;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
#define unix_sk(__sk) ((struct unix_sock *)__sk)
 | 
					#define unix_sk(__sk) ((struct unix_sock *)__sk)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1302,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb)
 | 
				
			||||||
	sock_wfree(skb);
 | 
						sock_wfree(skb);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 | 
					static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Need to duplicate file references for the sake of garbage
 | 
				
			||||||
 | 
						 * collection.  Otherwise a socket in the fps might become a
 | 
				
			||||||
 | 
						 * candidate for GC while the skb is not yet queued.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						UNIXCB(skb).fp = scm_fp_dup(scm->fp);
 | 
				
			||||||
 | 
						if (!UNIXCB(skb).fp)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i=scm->fp->count-1; i>=0; i--)
 | 
						for (i=scm->fp->count-1; i>=0; i--)
 | 
				
			||||||
		unix_inflight(scm->fp->fp[i]);
 | 
							unix_inflight(scm->fp->fp[i]);
 | 
				
			||||||
	UNIXCB(skb).fp = scm->fp;
 | 
					 | 
				
			||||||
	skb->destructor = unix_destruct_fds;
 | 
						skb->destructor = unix_destruct_fds;
 | 
				
			||||||
	scm->fp = NULL;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -1368,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 | 
						memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 | 
				
			||||||
	if (siocb->scm->fp)
 | 
						if (siocb->scm->fp) {
 | 
				
			||||||
		unix_attach_fds(siocb->scm, skb);
 | 
							err = unix_attach_fds(siocb->scm, skb);
 | 
				
			||||||
 | 
							if (err)
 | 
				
			||||||
 | 
								goto out_free;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	unix_get_secdata(siocb->scm, skb);
 | 
						unix_get_secdata(siocb->scm, skb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	skb_reset_transport_header(skb);
 | 
						skb_reset_transport_header(skb);
 | 
				
			||||||
| 
						 | 
					@ -1538,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 | 
				
			||||||
		size = min_t(int, size, skb_tailroom(skb));
 | 
							size = min_t(int, size, skb_tailroom(skb));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 | 
							memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 | 
				
			||||||
		if (siocb->scm->fp)
 | 
							if (siocb->scm->fp) {
 | 
				
			||||||
			unix_attach_fds(siocb->scm, skb);
 | 
								err = unix_attach_fds(siocb->scm, skb);
 | 
				
			||||||
 | 
								if (err) {
 | 
				
			||||||
 | 
									kfree_skb(skb);
 | 
				
			||||||
 | 
									goto out_err;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
 | 
							if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
 | 
				
			||||||
			kfree_skb(skb);
 | 
								kfree_skb(skb);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 | 
				
			||||||
				 */
 | 
									 */
 | 
				
			||||||
				struct sock *sk = unix_get_socket(*fp++);
 | 
									struct sock *sk = unix_get_socket(*fp++);
 | 
				
			||||||
				if (sk) {
 | 
									if (sk) {
 | 
				
			||||||
					hit = true;
 | 
										struct unix_sock *u = unix_sk(sk);
 | 
				
			||||||
					func(unix_sk(sk));
 | 
					
 | 
				
			||||||
 | 
										/*
 | 
				
			||||||
 | 
										 * Ignore non-candidates, they could
 | 
				
			||||||
 | 
										 * have been added to the queues after
 | 
				
			||||||
 | 
										 * starting the garbage collection
 | 
				
			||||||
 | 
										 */
 | 
				
			||||||
 | 
										if (u->gc_candidate) {
 | 
				
			||||||
 | 
											hit = true;
 | 
				
			||||||
 | 
											func(u);
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			if (hit && hitlist != NULL) {
 | 
								if (hit && hitlist != NULL) {
 | 
				
			||||||
| 
						 | 
					@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	atomic_long_inc(&u->inflight);
 | 
						atomic_long_inc(&u->inflight);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If this is still a candidate, move it to the end of the
 | 
						 * If this still might be part of a cycle, move it to the end
 | 
				
			||||||
	 * list, so that it's checked even if it was already passed
 | 
						 * of the list, so that it's checked even if it was already
 | 
				
			||||||
	 * over
 | 
						 * passed over
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (u->gc_candidate)
 | 
						if (u->gc_maybe_cycle)
 | 
				
			||||||
		list_move_tail(&u->link, &gc_candidates);
 | 
							list_move_tail(&u->link, &gc_candidates);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -267,6 +276,7 @@ void unix_gc(void)
 | 
				
			||||||
	struct unix_sock *next;
 | 
						struct unix_sock *next;
 | 
				
			||||||
	struct sk_buff_head hitlist;
 | 
						struct sk_buff_head hitlist;
 | 
				
			||||||
	struct list_head cursor;
 | 
						struct list_head cursor;
 | 
				
			||||||
 | 
						LIST_HEAD(not_cycle_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&unix_gc_lock);
 | 
						spin_lock(&unix_gc_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -282,10 +292,14 @@ void unix_gc(void)
 | 
				
			||||||
	 *
 | 
						 *
 | 
				
			||||||
	 * Holding unix_gc_lock will protect these candidates from
 | 
						 * Holding unix_gc_lock will protect these candidates from
 | 
				
			||||||
	 * being detached, and hence from gaining an external
 | 
						 * being detached, and hence from gaining an external
 | 
				
			||||||
	 * reference.  This also means, that since there are no
 | 
						 * reference.  Since there are no possible receivers, all
 | 
				
			||||||
	 * possible receivers, the receive queues of these sockets are
 | 
						 * buffers currently on the candidates' queues stay there
 | 
				
			||||||
	 * static during the GC, even though the dequeue is done
 | 
						 * during the garbage collection.
 | 
				
			||||||
	 * before the detach without atomicity guarantees.
 | 
						 *
 | 
				
			||||||
 | 
						 * We also know that no new candidate can be added onto the
 | 
				
			||||||
 | 
						 * receive queues.  Other, non candidate sockets _can_ be
 | 
				
			||||||
 | 
						 * added to queue, so we must make sure only to touch
 | 
				
			||||||
 | 
						 * candidates.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 | 
						list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 | 
				
			||||||
		long total_refs;
 | 
							long total_refs;
 | 
				
			||||||
| 
						 | 
					@ -299,6 +313,7 @@ void unix_gc(void)
 | 
				
			||||||
		if (total_refs == inflight_refs) {
 | 
							if (total_refs == inflight_refs) {
 | 
				
			||||||
			list_move_tail(&u->link, &gc_candidates);
 | 
								list_move_tail(&u->link, &gc_candidates);
 | 
				
			||||||
			u->gc_candidate = 1;
 | 
								u->gc_candidate = 1;
 | 
				
			||||||
 | 
								u->gc_maybe_cycle = 1;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -325,13 +340,23 @@ void unix_gc(void)
 | 
				
			||||||
		list_move(&cursor, &u->link);
 | 
							list_move(&cursor, &u->link);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (atomic_long_read(&u->inflight) > 0) {
 | 
							if (atomic_long_read(&u->inflight) > 0) {
 | 
				
			||||||
			list_move_tail(&u->link, &gc_inflight_list);
 | 
								list_move_tail(&u->link, ¬_cycle_list);
 | 
				
			||||||
			u->gc_candidate = 0;
 | 
								u->gc_maybe_cycle = 0;
 | 
				
			||||||
			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 | 
								scan_children(&u->sk, inc_inflight_move_tail, NULL);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	list_del(&cursor);
 | 
						list_del(&cursor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * not_cycle_list contains those sockets which do not make up a
 | 
				
			||||||
 | 
						 * cycle.  Restore these to the inflight list.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						while (!list_empty(¬_cycle_list)) {
 | 
				
			||||||
 | 
							u = list_entry(not_cycle_list.next, struct unix_sock, link);
 | 
				
			||||||
 | 
							u->gc_candidate = 0;
 | 
				
			||||||
 | 
							list_move_tail(&u->link, &gc_inflight_list);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Now gc_candidates contains only garbage.  Restore original
 | 
						 * Now gc_candidates contains only garbage.  Restore original
 | 
				
			||||||
	 * inflight counters for these as well, and remove the skbuffs
 | 
						 * inflight counters for these as well, and remove the skbuffs
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue