mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	btrfs: fix readdir deadlock with pagefault
Readdir does dir_emit while under the btree lock.  dir_emit can trigger
the page fault which means we can deadlock.  Fix this by allocating a
buffer on opening a directory and copying the readdir into this buffer
and doing dir_emit from outside of the tree lock.
Thread A
readdir  <holding tree lock>
  dir_emit
    <page fault>
      down_read(mmap_sem)
Thread B
mmap write
  down_write(mmap_sem)
    page_mkwrite
      wait_ordered_extents
Process C
finish_ordered_extent
  insert_reserved_file_extent
   try to lock leaf <hang>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ copy the deadlock scenario to changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
			
			
This commit is contained in:
		
							parent
							
								
									8d8aafeea2
								
							
						
					
					
						commit
						23b5ec7494
					
				
					 4 changed files with 110 additions and 34 deletions
				
			
		| 
						 | 
					@ -1264,6 +1264,11 @@ struct btrfs_root {
 | 
				
			||||||
	atomic64_t qgroup_meta_rsv;
 | 
						atomic64_t qgroup_meta_rsv;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct btrfs_file_private {
 | 
				
			||||||
 | 
						struct btrfs_trans_handle *trans;
 | 
				
			||||||
 | 
						void *filldir_buf;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
 | 
					static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return btrfs_sb(inode->i_sb)->sectorsize;
 | 
						return btrfs_sb(inode->i_sb)->sectorsize;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1990,8 +1990,15 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int btrfs_release_file(struct inode *inode, struct file *filp)
 | 
					int btrfs_release_file(struct inode *inode, struct file *filp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (filp->private_data)
 | 
						struct btrfs_file_private *private = filp->private_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (private && private->trans)
 | 
				
			||||||
		btrfs_ioctl_trans_end(filp);
 | 
							btrfs_ioctl_trans_end(filp);
 | 
				
			||||||
 | 
						if (private && private->filldir_buf)
 | 
				
			||||||
 | 
							kfree(private->filldir_buf);
 | 
				
			||||||
 | 
						kfree(private);
 | 
				
			||||||
 | 
						filp->private_data = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * ordered_data_close is set by settattr when we are about to truncate
 | 
						 * ordered_data_close is set by settattr when we are about to truncate
 | 
				
			||||||
	 * a file from a non-zero size to a zero size.  This tries to
 | 
						 * a file from a non-zero size to a zero size.  This tries to
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										108
									
								
								fs/btrfs/inode.c
									
									
									
									
									
								
							
							
						
						
									
										108
									
								
								fs/btrfs/inode.c
									
									
									
									
									
								
							| 
						 | 
					@ -5876,25 +5876,74 @@ unsigned char btrfs_filetype_table[] = {
 | 
				
			||||||
	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 | 
						DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * All this infrastructure exists because dir_emit can fault, and we are holding
 | 
				
			||||||
 | 
					 * the tree lock when doing readdir.  For now just allocate a buffer and copy
 | 
				
			||||||
 | 
					 * our information into that, and then dir_emit from the buffer.  This is
 | 
				
			||||||
 | 
					 * similar to what NFS does, only we don't keep the buffer around in pagecache
 | 
				
			||||||
 | 
					 * because I'm afraid I'll mess that up.  Long term we need to make filldir do
 | 
				
			||||||
 | 
					 * copy_to_user_inatomic so we don't have to worry about page faulting under the
 | 
				
			||||||
 | 
					 * tree lock.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static int btrfs_opendir(struct inode *inode, struct file *file)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct btrfs_file_private *private;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!private)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
						private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
 | 
				
			||||||
 | 
						if (!private->filldir_buf) {
 | 
				
			||||||
 | 
							kfree(private);
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						file->private_data = private;
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct dir_entry {
 | 
				
			||||||
 | 
						u64 ino;
 | 
				
			||||||
 | 
						u64 offset;
 | 
				
			||||||
 | 
						unsigned type;
 | 
				
			||||||
 | 
						int name_len;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						while (entries--) {
 | 
				
			||||||
 | 
							struct dir_entry *entry = addr;
 | 
				
			||||||
 | 
							char *name = (char *)(entry + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ctx->pos = entry->offset;
 | 
				
			||||||
 | 
							if (!dir_emit(ctx, name, entry->name_len, entry->ino,
 | 
				
			||||||
 | 
								      entry->type))
 | 
				
			||||||
 | 
								return 1;
 | 
				
			||||||
 | 
							addr += sizeof(struct dir_entry) + entry->name_len;
 | 
				
			||||||
 | 
							ctx->pos++;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 | 
					static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct inode *inode = file_inode(file);
 | 
						struct inode *inode = file_inode(file);
 | 
				
			||||||
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 | 
						struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 | 
				
			||||||
	struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
						struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
				
			||||||
 | 
						struct btrfs_file_private *private = file->private_data;
 | 
				
			||||||
	struct btrfs_dir_item *di;
 | 
						struct btrfs_dir_item *di;
 | 
				
			||||||
	struct btrfs_key key;
 | 
						struct btrfs_key key;
 | 
				
			||||||
	struct btrfs_key found_key;
 | 
						struct btrfs_key found_key;
 | 
				
			||||||
	struct btrfs_path *path;
 | 
						struct btrfs_path *path;
 | 
				
			||||||
 | 
						void *addr;
 | 
				
			||||||
	struct list_head ins_list;
 | 
						struct list_head ins_list;
 | 
				
			||||||
	struct list_head del_list;
 | 
						struct list_head del_list;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
	struct extent_buffer *leaf;
 | 
						struct extent_buffer *leaf;
 | 
				
			||||||
	int slot;
 | 
						int slot;
 | 
				
			||||||
	unsigned char d_type;
 | 
					 | 
				
			||||||
	int over = 0;
 | 
					 | 
				
			||||||
	char tmp_name[32];
 | 
					 | 
				
			||||||
	char *name_ptr;
 | 
						char *name_ptr;
 | 
				
			||||||
	int name_len;
 | 
						int name_len;
 | 
				
			||||||
 | 
						int entries = 0;
 | 
				
			||||||
 | 
						int total_len = 0;
 | 
				
			||||||
	bool put = false;
 | 
						bool put = false;
 | 
				
			||||||
	struct btrfs_key location;
 | 
						struct btrfs_key location;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5905,12 +5954,14 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 | 
				
			||||||
	if (!path)
 | 
						if (!path)
 | 
				
			||||||
		return -ENOMEM;
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						addr = private->filldir_buf;
 | 
				
			||||||
	path->reada = READA_FORWARD;
 | 
						path->reada = READA_FORWARD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	INIT_LIST_HEAD(&ins_list);
 | 
						INIT_LIST_HEAD(&ins_list);
 | 
				
			||||||
	INIT_LIST_HEAD(&del_list);
 | 
						INIT_LIST_HEAD(&del_list);
 | 
				
			||||||
	put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
 | 
						put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					again:
 | 
				
			||||||
	key.type = BTRFS_DIR_INDEX_KEY;
 | 
						key.type = BTRFS_DIR_INDEX_KEY;
 | 
				
			||||||
	key.offset = ctx->pos;
 | 
						key.offset = ctx->pos;
 | 
				
			||||||
	key.objectid = btrfs_ino(BTRFS_I(inode));
 | 
						key.objectid = btrfs_ino(BTRFS_I(inode));
 | 
				
			||||||
| 
						 | 
					@ -5920,6 +5971,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 | 
				
			||||||
		goto err;
 | 
							goto err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	while (1) {
 | 
						while (1) {
 | 
				
			||||||
 | 
							struct dir_entry *entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		leaf = path->nodes[0];
 | 
							leaf = path->nodes[0];
 | 
				
			||||||
		slot = path->slots[0];
 | 
							slot = path->slots[0];
 | 
				
			||||||
		if (slot >= btrfs_header_nritems(leaf)) {
 | 
							if (slot >= btrfs_header_nritems(leaf)) {
 | 
				
			||||||
| 
						 | 
					@ -5941,41 +5994,43 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 | 
				
			||||||
			goto next;
 | 
								goto next;
 | 
				
			||||||
		if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
 | 
							if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
 | 
				
			||||||
			goto next;
 | 
								goto next;
 | 
				
			||||||
 | 
					 | 
				
			||||||
		ctx->pos = found_key.offset;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
 | 
							di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
 | 
				
			||||||
		if (verify_dir_item(fs_info, leaf, slot, di))
 | 
							if (verify_dir_item(fs_info, leaf, slot, di))
 | 
				
			||||||
			goto next;
 | 
								goto next;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		name_len = btrfs_dir_name_len(leaf, di);
 | 
							name_len = btrfs_dir_name_len(leaf, di);
 | 
				
			||||||
		if (name_len <= sizeof(tmp_name)) {
 | 
							if ((total_len + sizeof(struct dir_entry) + name_len) >=
 | 
				
			||||||
			name_ptr = tmp_name;
 | 
							    PAGE_SIZE) {
 | 
				
			||||||
		} else {
 | 
								btrfs_release_path(path);
 | 
				
			||||||
			name_ptr = kmalloc(name_len, GFP_KERNEL);
 | 
								ret = btrfs_filldir(private->filldir_buf, entries, ctx);
 | 
				
			||||||
			if (!name_ptr) {
 | 
								if (ret)
 | 
				
			||||||
				ret = -ENOMEM;
 | 
									goto nopos;
 | 
				
			||||||
				goto err;
 | 
								addr = private->filldir_buf;
 | 
				
			||||||
			}
 | 
								entries = 0;
 | 
				
			||||||
 | 
								total_len = 0;
 | 
				
			||||||
 | 
								goto again;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							entry = addr;
 | 
				
			||||||
 | 
							entry->name_len = name_len;
 | 
				
			||||||
 | 
							name_ptr = (char *)(entry + 1);
 | 
				
			||||||
		read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
 | 
							read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
 | 
				
			||||||
				   name_len);
 | 
									   name_len);
 | 
				
			||||||
 | 
							entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
 | 
				
			||||||
		d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
 | 
					 | 
				
			||||||
		btrfs_dir_item_key_to_cpu(leaf, di, &location);
 | 
							btrfs_dir_item_key_to_cpu(leaf, di, &location);
 | 
				
			||||||
 | 
							entry->ino = location.objectid;
 | 
				
			||||||
		over = !dir_emit(ctx, name_ptr, name_len, location.objectid,
 | 
							entry->offset = found_key.offset;
 | 
				
			||||||
				 d_type);
 | 
							entries++;
 | 
				
			||||||
 | 
							addr += sizeof(struct dir_entry) + name_len;
 | 
				
			||||||
		if (name_ptr != tmp_name)
 | 
							total_len += sizeof(struct dir_entry) + name_len;
 | 
				
			||||||
			kfree(name_ptr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (over)
 | 
					 | 
				
			||||||
			goto nopos;
 | 
					 | 
				
			||||||
		ctx->pos++;
 | 
					 | 
				
			||||||
next:
 | 
					next:
 | 
				
			||||||
		path->slots[0]++;
 | 
							path->slots[0]++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						btrfs_release_path(path);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = btrfs_filldir(private->filldir_buf, entries, ctx);
 | 
				
			||||||
 | 
						if (ret)
 | 
				
			||||||
 | 
							goto nopos;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
 | 
						ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
 | 
				
			||||||
	if (ret)
 | 
						if (ret)
 | 
				
			||||||
| 
						 | 
					@ -10779,6 +10834,7 @@ static const struct file_operations btrfs_dir_file_operations = {
 | 
				
			||||||
	.llseek		= generic_file_llseek,
 | 
						.llseek		= generic_file_llseek,
 | 
				
			||||||
	.read		= generic_read_dir,
 | 
						.read		= generic_read_dir,
 | 
				
			||||||
	.iterate_shared	= btrfs_real_readdir,
 | 
						.iterate_shared	= btrfs_real_readdir,
 | 
				
			||||||
 | 
						.open		= btrfs_opendir,
 | 
				
			||||||
	.unlocked_ioctl	= btrfs_ioctl,
 | 
						.unlocked_ioctl	= btrfs_ioctl,
 | 
				
			||||||
#ifdef CONFIG_COMPAT
 | 
					#ifdef CONFIG_COMPAT
 | 
				
			||||||
	.compat_ioctl	= btrfs_compat_ioctl,
 | 
						.compat_ioctl	= btrfs_compat_ioctl,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3966,6 +3966,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
 | 
				
			||||||
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 | 
						struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 | 
				
			||||||
	struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
						struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
				
			||||||
	struct btrfs_trans_handle *trans;
 | 
						struct btrfs_trans_handle *trans;
 | 
				
			||||||
 | 
						struct btrfs_file_private *private;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
	static bool warned = false;
 | 
						static bool warned = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3984,8 +3985,16 @@ static long btrfs_ioctl_trans_start(struct file *file)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = -EINPROGRESS;
 | 
						ret = -EINPROGRESS;
 | 
				
			||||||
	if (file->private_data)
 | 
						private = file->private_data;
 | 
				
			||||||
 | 
						if (private && private->trans)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
						if (!private) {
 | 
				
			||||||
 | 
							private = kzalloc(sizeof(struct btrfs_file_private),
 | 
				
			||||||
 | 
									  GFP_KERNEL);
 | 
				
			||||||
 | 
							if (!private)
 | 
				
			||||||
 | 
								return -ENOMEM;
 | 
				
			||||||
 | 
							file->private_data = private;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = -EROFS;
 | 
						ret = -EROFS;
 | 
				
			||||||
	if (btrfs_root_readonly(root))
 | 
						if (btrfs_root_readonly(root))
 | 
				
			||||||
| 
						 | 
					@ -4002,7 +4011,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
 | 
				
			||||||
	if (IS_ERR(trans))
 | 
						if (IS_ERR(trans))
 | 
				
			||||||
		goto out_drop;
 | 
							goto out_drop;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	file->private_data = trans;
 | 
						private->trans = trans;
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_drop:
 | 
					out_drop:
 | 
				
			||||||
| 
						 | 
					@ -4257,14 +4266,13 @@ long btrfs_ioctl_trans_end(struct file *file)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct inode *inode = file_inode(file);
 | 
						struct inode *inode = file_inode(file);
 | 
				
			||||||
	struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
						struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
				
			||||||
	struct btrfs_trans_handle *trans;
 | 
						struct btrfs_file_private *private = file->private_data;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trans = file->private_data;
 | 
						if (!private || !private->trans)
 | 
				
			||||||
	if (!trans)
 | 
					 | 
				
			||||||
		return -EINVAL;
 | 
							return -EINVAL;
 | 
				
			||||||
	file->private_data = NULL;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	btrfs_end_transaction(trans);
 | 
						btrfs_end_transaction(private->trans);
 | 
				
			||||||
 | 
						private->trans = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	atomic_dec(&root->fs_info->open_ioctl_trans);
 | 
						atomic_dec(&root->fs_info->open_ioctl_trans);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue