mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	vfs: fix dentry RCU to refcounting possibly sleeping dput()
This is the fix that the last two commits indirectly led up to - making sure that we don't call dput() in a bad context on the dentries we've looked up in RCU mode after the sequence count validation fails. This basically expands d_rcu_to_refcount() into the callers, and then fixes the callers to delay the dput() in the failure case until _after_ we've dropped all locks and are no longer in an RCU-locked region. The case of 'complete_walk()' was trivial, since its failure case did the unlock_rcu_walk() directly after the call to d_rcu_to_refcount(), and as such that is just a pure expansion of the function with a trivial movement of the resulting dput() to after 'unlock_rcu_walk()'. In contrast, the unlazy_walk() case was much more complicated, because not only does convert two different dentries from RCU to be reference counted, but it used to not call unlock_rcu_walk() at all, and instead just returned an error and let the caller clean everything up in "terminate_walk()". Happily, one of the dentries in question (called "parent" inside unlazy_walk()) is the dentry of "nd->path", which terminate_walk() wants a refcount to anyway for the non-RCU case. So what the new and improved unlazy_walk() does is to first turn that dentry into a refcounted one, and once that is set up, the error cases can continue to use the terminate_walk() helper for cleanup, but for the non-RCU case. Which makes it possible to drop out of RCU mode if we actually hit the sequence number failure case. Acked-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									0d98439ea3
								
							
						
					
					
						commit
						e5c832d555
					
				
					 1 changed files with 49 additions and 53 deletions
				
			
		
							
								
								
									
										102
									
								
								fs/namei.c
									
									
									
									
									
								
							
							
						
						
									
										102
									
								
								fs/namei.c
									
									
									
									
									
								
							| 
						 | 
					@ -494,37 +494,6 @@ static inline void unlock_rcu_walk(void)
 | 
				
			||||||
	br_read_unlock(&vfsmount_lock);
 | 
						br_read_unlock(&vfsmount_lock);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * When we move over from the RCU domain to properly refcounted
 | 
					 | 
				
			||||||
 * long-lived dentries, we need to check the sequence numbers
 | 
					 | 
				
			||||||
 * we got before lookup very carefully.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * We cannot blindly increment a dentry refcount - even if it
 | 
					 | 
				
			||||||
 * is not locked - if it is zero, because it may have gone
 | 
					 | 
				
			||||||
 * through the final d_kill() logic already.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * So for a zero refcount, we need to get the spinlock (which is
 | 
					 | 
				
			||||||
 * safe even for a dead dentry because the de-allocation is
 | 
					 | 
				
			||||||
 * RCU-delayed), and check the sequence count under the lock.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Once we have checked the sequence count, we know it is live,
 | 
					 | 
				
			||||||
 * and since we hold the spinlock it cannot die from under us.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * In contrast, if the reference count wasn't zero, we can just
 | 
					 | 
				
			||||||
 * increment the lockref without having to take the spinlock.
 | 
					 | 
				
			||||||
 * Even if the sequence number ends up being stale, we haven't
 | 
					 | 
				
			||||||
 * gone through the final dput() and killed the dentry yet.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	if (likely(lockref_get_not_dead(&dentry->d_lockref))) {
 | 
					 | 
				
			||||||
		if (!read_seqcount_retry(validate, seq))
 | 
					 | 
				
			||||||
				return 0;
 | 
					 | 
				
			||||||
		dput(dentry);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return -ECHILD;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * unlazy_walk - try to switch to ref-walk mode.
 | 
					 * unlazy_walk - try to switch to ref-walk mode.
 | 
				
			||||||
 * @nd: nameidata pathwalk data
 | 
					 * @nd: nameidata pathwalk data
 | 
				
			||||||
| 
						 | 
					@ -539,16 +508,29 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct fs_struct *fs = current->fs;
 | 
						struct fs_struct *fs = current->fs;
 | 
				
			||||||
	struct dentry *parent = nd->path.dentry;
 | 
						struct dentry *parent = nd->path.dentry;
 | 
				
			||||||
	int want_root = 0;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(!(nd->flags & LOOKUP_RCU));
 | 
						BUG_ON(!(nd->flags & LOOKUP_RCU));
 | 
				
			||||||
	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
 | 
					
 | 
				
			||||||
		want_root = 1;
 | 
						/*
 | 
				
			||||||
		spin_lock(&fs->lock);
 | 
						 * Get a reference to the parent first: we're
 | 
				
			||||||
		if (nd->root.mnt != fs->root.mnt ||
 | 
						 * going to make "path_put(nd->path)" valid in
 | 
				
			||||||
				nd->root.dentry != fs->root.dentry)
 | 
						 * non-RCU context for "terminate_walk()".
 | 
				
			||||||
			goto err_root;
 | 
						 *
 | 
				
			||||||
	}
 | 
						 * If this doesn't work, return immediately with
 | 
				
			||||||
 | 
						 * RCU walking still active (and then we will do
 | 
				
			||||||
 | 
						 * the RCU walk cleanup in terminate_walk()).
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (!lockref_get_not_dead(&parent->d_lockref))
 | 
				
			||||||
 | 
							return -ECHILD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * After the mntget(), we terminate_walk() will do
 | 
				
			||||||
 | 
						 * the right thing for non-RCU mode, and all our
 | 
				
			||||||
 | 
						 * subsequent exit cases should unlock_rcu_walk()
 | 
				
			||||||
 | 
						 * before returning.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						mntget(nd->path.mnt);
 | 
				
			||||||
 | 
						nd->flags &= ~LOOKUP_RCU;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * For a negative lookup, the lookup sequence point is the parents
 | 
						 * For a negative lookup, the lookup sequence point is the parents
 | 
				
			||||||
| 
						 | 
					@ -562,30 +544,39 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
 | 
				
			||||||
	 * be valid if the child sequence number is still valid.
 | 
						 * be valid if the child sequence number is still valid.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (!dentry) {
 | 
						if (!dentry) {
 | 
				
			||||||
		if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
 | 
							if (read_seqcount_retry(&parent->d_seq, nd->seq))
 | 
				
			||||||
			goto err_root;
 | 
								goto out;
 | 
				
			||||||
		BUG_ON(nd->inode != parent->d_inode);
 | 
							BUG_ON(nd->inode != parent->d_inode);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
 | 
							if (!lockref_get_not_dead(&dentry->d_lockref))
 | 
				
			||||||
			goto err_root;
 | 
								goto out;
 | 
				
			||||||
		if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
 | 
							if (read_seqcount_retry(&dentry->d_seq, nd->seq))
 | 
				
			||||||
			goto err_parent;
 | 
								goto drop_dentry;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (want_root) {
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Sequence counts matched. Now make sure that the root is
 | 
				
			||||||
 | 
						 * still valid and get it if required.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
 | 
				
			||||||
 | 
							spin_lock(&fs->lock);
 | 
				
			||||||
 | 
							if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
 | 
				
			||||||
 | 
								goto unlock_and_drop_dentry;
 | 
				
			||||||
		path_get(&nd->root);
 | 
							path_get(&nd->root);
 | 
				
			||||||
		spin_unlock(&fs->lock);
 | 
							spin_unlock(&fs->lock);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	mntget(nd->path.mnt);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	unlock_rcu_walk();
 | 
						unlock_rcu_walk();
 | 
				
			||||||
	nd->flags &= ~LOOKUP_RCU;
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
err_parent:
 | 
					unlock_and_drop_dentry:
 | 
				
			||||||
	dput(dentry);
 | 
					 | 
				
			||||||
err_root:
 | 
					 | 
				
			||||||
	if (want_root)
 | 
					 | 
				
			||||||
	spin_unlock(&fs->lock);
 | 
						spin_unlock(&fs->lock);
 | 
				
			||||||
 | 
					drop_dentry:
 | 
				
			||||||
 | 
						unlock_rcu_walk();
 | 
				
			||||||
 | 
						dput(dentry);
 | 
				
			||||||
 | 
						return -ECHILD;
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						unlock_rcu_walk();
 | 
				
			||||||
	return -ECHILD;
 | 
						return -ECHILD;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -614,10 +605,15 @@ static int complete_walk(struct nameidata *nd)
 | 
				
			||||||
		if (!(nd->flags & LOOKUP_ROOT))
 | 
							if (!(nd->flags & LOOKUP_ROOT))
 | 
				
			||||||
			nd->root.mnt = NULL;
 | 
								nd->root.mnt = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
 | 
							if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
 | 
				
			||||||
			unlock_rcu_walk();
 | 
								unlock_rcu_walk();
 | 
				
			||||||
			return -ECHILD;
 | 
								return -ECHILD;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
 | 
				
			||||||
 | 
								unlock_rcu_walk();
 | 
				
			||||||
 | 
								dput(dentry);
 | 
				
			||||||
 | 
								return -ECHILD;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		mntget(nd->path.mnt);
 | 
							mntget(nd->path.mnt);
 | 
				
			||||||
		unlock_rcu_walk();
 | 
							unlock_rcu_walk();
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue