mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	NFS: don't unhash dentry during unlink/rename
NFS unlink() (and rename over existing target) must determine if the file is open, and must perform a "silly rename" instead of an unlink (or before rename) if it is. Otherwise the client might hold a file open which has been removed on the server. Consequently if it determines that the file isn't open, it must block any subsequent opens until the unlink/rename has been completed on the server. This is currently achieved by unhashing the dentry. This forces any open attempt to the slow-path for lookup which will block on i_rwsem on the directory until the unlink/rename completes. A future patch will change the VFS to only get a shared lock on i_rwsem for unlink, so this will no longer work. Instead we introduce an explicit interlock. A special value is stored in dentry->d_fsdata while the unlink/rename is running and ->d_revalidate blocks while that value is present. When ->d_revalidate unblocks, the dentry will be invalid. This closes the race without requiring exclusion on i_rwsem. d_fsdata is already used in two different ways. 1/ an IS_ROOT directory dentry might have a "devname" stored in d_fsdata. Such a dentry doesn't have a name and so cannot be the target of unlink or rename. For safety we check if an old devname is still stored, and remove it if it is. 2/ a dentry with DCACHE_NFSFS_RENAMED set will have a 'struct nfs_unlinkdata' stored in d_fsdata. While this is set maydelete() will fail, so an unlink or rename will never proceed on such a dentry. Neither of these can be in effect when a dentry is the target of unlink or rename. So we can expect d_fsdata to be NULL, and store a special value ((void*)1) which is given the name NFS_FSDATA_BLOCKED to indicate that any lookup will be blocked. The d_count() is incremented under d_lock() when a lookup finds the dentry, so we check d_count() is low, and set NFS_FSDATA_BLOCKED under the same lock to avoid any races. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
		
							parent
							
								
									2135e5d562
								
							
						
					
					
						commit
						3c59366c20
					
				
					 2 changed files with 63 additions and 18 deletions
				
			
		
							
								
								
									
										72
									
								
								fs/nfs/dir.c
									
									
									
									
									
								
							
							
						
						
									
										72
									
								
								fs/nfs/dir.c
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -1782,6 +1782,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
 | 
			
		|||
	int ret;
 | 
			
		||||
 | 
			
		||||
	if (flags & LOOKUP_RCU) {
 | 
			
		||||
		if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
 | 
			
		||||
			return -ECHILD;
 | 
			
		||||
		parent = READ_ONCE(dentry->d_parent);
 | 
			
		||||
		dir = d_inode_rcu(parent);
 | 
			
		||||
		if (!dir)
 | 
			
		||||
| 
						 | 
				
			
			@ -1790,6 +1792,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
 | 
			
		|||
		if (parent != READ_ONCE(dentry->d_parent))
 | 
			
		||||
			return -ECHILD;
 | 
			
		||||
	} else {
 | 
			
		||||
		/* Wait for unlink to complete */
 | 
			
		||||
		wait_var_event(&dentry->d_fsdata,
 | 
			
		||||
			       dentry->d_fsdata != NFS_FSDATA_BLOCKED);
 | 
			
		||||
		parent = dget_parent(dentry);
 | 
			
		||||
		ret = reval(d_inode(parent), dentry, flags);
 | 
			
		||||
		dput(parent);
 | 
			
		||||
| 
						 | 
				
			
			@ -2458,7 +2463,6 @@ static int nfs_safe_remove(struct dentry *dentry)
 | 
			
		|||
int nfs_unlink(struct inode *dir, struct dentry *dentry)
 | 
			
		||||
{
 | 
			
		||||
	int error;
 | 
			
		||||
	int need_rehash = 0;
 | 
			
		||||
 | 
			
		||||
	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
 | 
			
		||||
		dir->i_ino, dentry);
 | 
			
		||||
| 
						 | 
				
			
			@ -2473,15 +2477,25 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
 | 
			
		|||
		error = nfs_sillyrename(dir, dentry);
 | 
			
		||||
		goto out;
 | 
			
		||||
	}
 | 
			
		||||
	if (!d_unhashed(dentry)) {
 | 
			
		||||
		__d_drop(dentry);
 | 
			
		||||
		need_rehash = 1;
 | 
			
		||||
	}
 | 
			
		||||
	/* We must prevent any concurrent open until the unlink
 | 
			
		||||
	 * completes.  ->d_revalidate will wait for ->d_fsdata
 | 
			
		||||
	 * to clear.  We set it here to ensure no lookup succeeds until
 | 
			
		||||
	 * the unlink is complete on the server.
 | 
			
		||||
	 */
 | 
			
		||||
	error = -ETXTBSY;
 | 
			
		||||
	if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
 | 
			
		||||
	    WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
 | 
			
		||||
		goto out;
 | 
			
		||||
	if (dentry->d_fsdata)
 | 
			
		||||
		/* old devname */
 | 
			
		||||
		kfree(dentry->d_fsdata);
 | 
			
		||||
	dentry->d_fsdata = NFS_FSDATA_BLOCKED;
 | 
			
		||||
 | 
			
		||||
	spin_unlock(&dentry->d_lock);
 | 
			
		||||
	error = nfs_safe_remove(dentry);
 | 
			
		||||
	nfs_dentry_remove_handle_error(dir, dentry, error);
 | 
			
		||||
	if (need_rehash)
 | 
			
		||||
		d_rehash(dentry);
 | 
			
		||||
	dentry->d_fsdata = NULL;
 | 
			
		||||
	wake_up_var(&dentry->d_fsdata);
 | 
			
		||||
out:
 | 
			
		||||
	trace_nfs_unlink_exit(dir, dentry, error);
 | 
			
		||||
	return error;
 | 
			
		||||
| 
						 | 
				
			
			@ -2588,6 +2602,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 | 
			
		|||
}
 | 
			
		||||
EXPORT_SYMBOL_GPL(nfs_link);
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
 | 
			
		||||
{
 | 
			
		||||
	struct dentry *new_dentry = data->new_dentry;
 | 
			
		||||
 | 
			
		||||
	new_dentry->d_fsdata = NULL;
 | 
			
		||||
	wake_up_var(&new_dentry->d_fsdata);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * RENAME
 | 
			
		||||
 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
 | 
			
		||||
| 
						 | 
				
			
			@ -2618,8 +2641,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 | 
			
		|||
{
 | 
			
		||||
	struct inode *old_inode = d_inode(old_dentry);
 | 
			
		||||
	struct inode *new_inode = d_inode(new_dentry);
 | 
			
		||||
	struct dentry *dentry = NULL, *rehash = NULL;
 | 
			
		||||
	struct dentry *dentry = NULL;
 | 
			
		||||
	struct rpc_task *task;
 | 
			
		||||
	bool must_unblock = false;
 | 
			
		||||
	int error = -EBUSY;
 | 
			
		||||
 | 
			
		||||
	if (flags)
 | 
			
		||||
| 
						 | 
				
			
			@ -2637,18 +2661,27 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 | 
			
		|||
	 * the new target.
 | 
			
		||||
	 */
 | 
			
		||||
	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * To prevent any new references to the target during the
 | 
			
		||||
		 * rename, we unhash the dentry in advance.
 | 
			
		||||
		/* We must prevent any concurrent open until the unlink
 | 
			
		||||
		 * completes.  ->d_revalidate will wait for ->d_fsdata
 | 
			
		||||
		 * to clear.  We set it here to ensure no lookup succeeds until
 | 
			
		||||
		 * the unlink is complete on the server.
 | 
			
		||||
		 */
 | 
			
		||||
		if (!d_unhashed(new_dentry)) {
 | 
			
		||||
			d_drop(new_dentry);
 | 
			
		||||
			rehash = new_dentry;
 | 
			
		||||
		error = -ETXTBSY;
 | 
			
		||||
		if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
 | 
			
		||||
		    WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
 | 
			
		||||
			goto out;
 | 
			
		||||
		if (new_dentry->d_fsdata) {
 | 
			
		||||
			/* old devname */
 | 
			
		||||
			kfree(new_dentry->d_fsdata);
 | 
			
		||||
			new_dentry->d_fsdata = NULL;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		spin_lock(&new_dentry->d_lock);
 | 
			
		||||
		if (d_count(new_dentry) > 2) {
 | 
			
		||||
			int err;
 | 
			
		||||
 | 
			
		||||
			spin_unlock(&new_dentry->d_lock);
 | 
			
		||||
 | 
			
		||||
			/* copy the target dentry's name */
 | 
			
		||||
			dentry = d_alloc(new_dentry->d_parent,
 | 
			
		||||
					 &new_dentry->d_name);
 | 
			
		||||
| 
						 | 
				
			
			@ -2661,14 +2694,19 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 | 
			
		|||
				goto out;
 | 
			
		||||
 | 
			
		||||
			new_dentry = dentry;
 | 
			
		||||
			rehash = NULL;
 | 
			
		||||
			new_inode = NULL;
 | 
			
		||||
		} else {
 | 
			
		||||
			new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
 | 
			
		||||
			must_unblock = true;
 | 
			
		||||
			spin_unlock(&new_dentry->d_lock);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (S_ISREG(old_inode->i_mode))
 | 
			
		||||
		nfs_sync_inode(old_inode);
 | 
			
		||||
	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
 | 
			
		||||
	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
 | 
			
		||||
				must_unblock ? nfs_unblock_rename : NULL);
 | 
			
		||||
	if (IS_ERR(task)) {
 | 
			
		||||
		error = PTR_ERR(task);
 | 
			
		||||
		goto out;
 | 
			
		||||
| 
						 | 
				
			
			@ -2692,8 +2730,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 | 
			
		|||
		spin_unlock(&old_inode->i_lock);
 | 
			
		||||
	}
 | 
			
		||||
out:
 | 
			
		||||
	if (rehash)
 | 
			
		||||
		d_rehash(rehash);
 | 
			
		||||
	trace_nfs_rename_exit(old_dir, old_dentry,
 | 
			
		||||
			new_dir, new_dentry, error);
 | 
			
		||||
	if (!error) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -617,6 +617,15 @@ nfs_fileid_to_ino_t(u64 fileid)
 | 
			
		|||
 | 
			
		||||
#define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 | 
			
		||||
 | 
			
		||||
/* We need to block new opens while a file is being unlinked.
 | 
			
		||||
 * If it is opened *before* we decide to unlink, we will silly-rename
 | 
			
		||||
 * instead. If it is opened *after*, then we need to create or will fail.
 | 
			
		||||
 * If we allow the two to race, we could end up with a file that is open
 | 
			
		||||
 * but deleted on the server resulting in ESTALE.
 | 
			
		||||
 * So use ->d_fsdata to record when the unlink is happening
 | 
			
		||||
 * and block dentry revalidation while it is set.
 | 
			
		||||
 */
 | 
			
		||||
#define NFS_FSDATA_BLOCKED ((void*)1)
 | 
			
		||||
 | 
			
		||||
# undef ifdebug
 | 
			
		||||
# ifdef NFS_DEBUG
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue