mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	io_uring/af_unix: defer registered files gc to io_uring release
Instead of putting io_uring's registered files in unix_gc() we want it
to be done by io_uring itself. The trick here is to consider io_uring
registered files for cycle detection but not actually putting them down.
Because io_uring can't register other ring instances, this will remove
all refs to the ring file triggering the ->release path and clean up
with io_ring_ctx_free().
Cc: stable@vger.kernel.org
Fixes: 6b06314c47 ("io_uring: add file set registration")
Reported-and-tested-by: David Bouman <dbouman03@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
[axboe: add kerneldoc comment to skb, fold in skb leak fix]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
			
			
This commit is contained in:
		
							parent
							
								
									d7cce96c44
								
							
						
					
					
						commit
						0091bfc817
					
				
					 3 changed files with 23 additions and 0 deletions
				
			
		| 
						 | 
					@ -803,6 +803,7 @@ typedef unsigned char *sk_buff_data_t;
 | 
				
			||||||
 *	@csum_level: indicates the number of consecutive checksums found in
 | 
					 *	@csum_level: indicates the number of consecutive checksums found in
 | 
				
			||||||
 *		the packet minus one that have been verified as
 | 
					 *		the packet minus one that have been verified as
 | 
				
			||||||
 *		CHECKSUM_UNNECESSARY (max 3)
 | 
					 *		CHECKSUM_UNNECESSARY (max 3)
 | 
				
			||||||
 | 
					 *	@scm_io_uring: SKB holds io_uring registered files
 | 
				
			||||||
 *	@dst_pending_confirm: need to confirm neighbour
 | 
					 *	@dst_pending_confirm: need to confirm neighbour
 | 
				
			||||||
 *	@decrypted: Decrypted SKB
 | 
					 *	@decrypted: Decrypted SKB
 | 
				
			||||||
 *	@slow_gro: state present at GRO time, slower prepare step required
 | 
					 *	@slow_gro: state present at GRO time, slower prepare step required
 | 
				
			||||||
| 
						 | 
					@ -982,6 +983,7 @@ struct sk_buff {
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	__u8			slow_gro:1;
 | 
						__u8			slow_gro:1;
 | 
				
			||||||
	__u8			csum_not_inet:1;
 | 
						__u8			csum_not_inet:1;
 | 
				
			||||||
 | 
						__u8			scm_io_uring:1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_NET_SCHED
 | 
					#ifdef CONFIG_NET_SCHED
 | 
				
			||||||
	__u16			tc_index;	/* traffic control index */
 | 
						__u16			tc_index;	/* traffic control index */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -855,6 +855,7 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		UNIXCB(skb).fp = fpl;
 | 
							UNIXCB(skb).fp = fpl;
 | 
				
			||||||
		skb->sk = sk;
 | 
							skb->sk = sk;
 | 
				
			||||||
 | 
							skb->scm_io_uring = 1;
 | 
				
			||||||
		skb->destructor = unix_destruct_scm;
 | 
							skb->destructor = unix_destruct_scm;
 | 
				
			||||||
		refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 | 
							refcount_add(skb->truesize, &sk->sk_wmem_alloc);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -204,6 +204,7 @@ void wait_for_unix_gc(void)
 | 
				
			||||||
/* The external entry point: unix_gc() */
 | 
					/* The external entry point: unix_gc() */
 | 
				
			||||||
void unix_gc(void)
 | 
					void unix_gc(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct sk_buff *next_skb, *skb;
 | 
				
			||||||
	struct unix_sock *u;
 | 
						struct unix_sock *u;
 | 
				
			||||||
	struct unix_sock *next;
 | 
						struct unix_sock *next;
 | 
				
			||||||
	struct sk_buff_head hitlist;
 | 
						struct sk_buff_head hitlist;
 | 
				
			||||||
| 
						 | 
					@ -297,11 +298,30 @@ void unix_gc(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_unlock(&unix_gc_lock);
 | 
						spin_unlock(&unix_gc_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* We need io_uring to clean its registered files, ignore all io_uring
 | 
				
			||||||
 | 
						 * originated skbs. It's fine as io_uring doesn't keep references to
 | 
				
			||||||
 | 
						 * other io_uring instances and so killing all other files in the cycle
 | 
				
			||||||
 | 
						 * will put all io_uring references forcing it to go through normal
 | 
				
			||||||
 | 
						 * release.path eventually putting registered files.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						skb_queue_walk_safe(&hitlist, skb, next_skb) {
 | 
				
			||||||
 | 
							if (skb->scm_io_uring) {
 | 
				
			||||||
 | 
								__skb_unlink(skb, &hitlist);
 | 
				
			||||||
 | 
								skb_queue_tail(&skb->sk->sk_receive_queue, skb);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Here we are. Hitlist is filled. Die. */
 | 
						/* Here we are. Hitlist is filled. Die. */
 | 
				
			||||||
	__skb_queue_purge(&hitlist);
 | 
						__skb_queue_purge(&hitlist);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock(&unix_gc_lock);
 | 
						spin_lock(&unix_gc_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* There could be io_uring registered files, just push them back to
 | 
				
			||||||
 | 
						 * the inflight list
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						list_for_each_entry_safe(u, next, &gc_candidates, link)
 | 
				
			||||||
 | 
							list_move_tail(&u->link, &gc_inflight_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* All candidates should have been detached by now. */
 | 
						/* All candidates should have been detached by now. */
 | 
				
			||||||
	BUG_ON(!list_empty(&gc_candidates));
 | 
						BUG_ON(!list_empty(&gc_candidates));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue