mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	af_unix: Fix garbage collector racing against connect()
Garbage collector does not take into account the risk of embryo getting
enqueued during the garbage collection. If such embryo has a peer that
carries SCM_RIGHTS, two consecutive passes of scan_children() may see a
different set of children. Leading to an incorrectly elevated inflight
count, and then a dangling pointer within the gc_inflight_list.
sockets are AF_UNIX/SOCK_STREAM
S is an unconnected socket
L is a listening in-flight socket bound to addr, not in fdtable
V's fd will be passed via sendmsg(), gets inflight count bumped
connect(S, addr)	sendmsg(S, [V]); close(V)	__unix_gc()
----------------	-------------------------	-----------
NS = unix_create1()
skb1 = sock_wmalloc(NS)
L = unix_find_other(addr)
unix_state_lock(L)
unix_peer(S) = NS
			// V count=1 inflight=0
 			NS = unix_peer(S)
 			skb2 = sock_alloc()
			skb_queue_tail(NS, skb2[V])
			// V became in-flight
			// V count=2 inflight=1
			close(V)
			// V count=1 inflight=1
			// GC candidate condition met
						for u in gc_inflight_list:
						  if (total_refs == inflight_refs)
						    add u to gc_candidates
						// gc_candidates={L, V}
						for u in gc_candidates:
						  scan_children(u, dec_inflight)
						// embryo (skb1) was not
						// reachable from L yet, so V's
						// inflight remains unchanged
__skb_queue_tail(L, skb1)
unix_state_unlock(L)
						for u in gc_candidates:
						  if (u.inflight)
						    scan_children(u, inc_inflight_move_tail)
						// V count=1 inflight=2 (!)
If there is a GC-candidate listening socket, lock/unlock its state. This
makes GC wait until the end of any ongoing connect() to that socket. After
flipping the lock, a possibly SCM-laden embryo is already enqueued. And if
there is another embryo coming, it can not possibly carry SCM_RIGHTS. At
this point, unix_inflight() can not happen because unix_gc_lock is already
taken. Inflight graph remains unaffected.
Fixes: 1fd05ba5a2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
			
			
This commit is contained in:
		
							parent
							
								
									17c5601132
								
							
						
					
					
						commit
						47d8ac011f
					
				
					 1 changed files with 17 additions and 1 deletions
				
			
		| 
						 | 
					@ -274,11 +274,22 @@ static void __unix_gc(struct work_struct *work)
 | 
				
			||||||
	 * receive queues.  Other, non candidate sockets _can_ be
 | 
						 * receive queues.  Other, non candidate sockets _can_ be
 | 
				
			||||||
	 * added to queue, so we must make sure only to touch
 | 
						 * added to queue, so we must make sure only to touch
 | 
				
			||||||
	 * candidates.
 | 
						 * candidates.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Embryos, though never candidates themselves, affect which
 | 
				
			||||||
 | 
						 * candidates are reachable by the garbage collector.  Before
 | 
				
			||||||
 | 
						 * being added to a listener's queue, an embryo may already
 | 
				
			||||||
 | 
						 * receive data carrying SCM_RIGHTS, potentially making the
 | 
				
			||||||
 | 
						 * passed socket a candidate that is not yet reachable by the
 | 
				
			||||||
 | 
						 * collector.  It becomes reachable once the embryo is
 | 
				
			||||||
 | 
						 * enqueued.  Therefore, we must ensure that no SCM-laden
 | 
				
			||||||
 | 
						 * embryo appears in a (candidate) listener's queue between
 | 
				
			||||||
 | 
						 * consecutive scan_children() calls.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 | 
						list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 | 
				
			||||||
 | 
							struct sock *sk = &u->sk;
 | 
				
			||||||
		long total_refs;
 | 
							long total_refs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		total_refs = file_count(u->sk.sk_socket->file);
 | 
							total_refs = file_count(sk->sk_socket->file);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		WARN_ON_ONCE(!u->inflight);
 | 
							WARN_ON_ONCE(!u->inflight);
 | 
				
			||||||
		WARN_ON_ONCE(total_refs < u->inflight);
 | 
							WARN_ON_ONCE(total_refs < u->inflight);
 | 
				
			||||||
| 
						 | 
					@ -286,6 +297,11 @@ static void __unix_gc(struct work_struct *work)
 | 
				
			||||||
			list_move_tail(&u->link, &gc_candidates);
 | 
								list_move_tail(&u->link, &gc_candidates);
 | 
				
			||||||
			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 | 
								__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
 | 
				
			||||||
			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 | 
								__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (sk->sk_state == TCP_LISTEN) {
 | 
				
			||||||
 | 
									unix_state_lock(sk);
 | 
				
			||||||
 | 
									unix_state_unlock(sk);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue