mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	Btrfs: remove deleted xattrs on fsync log replay
If we deleted xattrs from a file and fsynced the file, after a log replay
the xattrs would remain associated to the file. This was an unexpected
behaviour and differs from what other filesystems do, such as for example
xfs and ext3/4.
Fix this by, on fsync log replay, check if every xattr in the fs/subvol
tree (that belongs to a logged inode) has a matching xattr in the log,
and if it does not, delete it from the fs/subvol tree. This is a similar
approach to what we do for dentries when we replay a directory from the
fsync log.
This issue is trivial to reproduce, and the following excerpt from my
test for xfstests triggers the issue:
  _crash_and_mount()
  {
       # Simulate a crash/power loss.
       _load_flakey_table $FLAKEY_DROP_WRITES
       _unmount_flakey
       _load_flakey_table $FLAKEY_ALLOW_WRITES
       _mount_flakey
  }
  rm -f $seqres.full
  _scratch_mkfs >> $seqres.full 2>&1
  _init_flakey
  _mount_flakey
  # Create out test file and add 3 xattrs to it.
  touch $SCRATCH_MNT/foobar
  $SETFATTR_PROG -n user.attr1 -v val1 $SCRATCH_MNT/foobar
  $SETFATTR_PROG -n user.attr2 -v val2 $SCRATCH_MNT/foobar
  $SETFATTR_PROG -n user.attr3 -v val3 $SCRATCH_MNT/foobar
  # Make sure everything is durably persisted.
  sync
  # Now delete the second xattr and fsync the inode.
  $SETFATTR_PROG -x user.attr2 $SCRATCH_MNT/foobar
  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar
  _crash_and_mount
  # After the fsync log is replayed, the file should have only 2 xattrs, the ones
  # named user.attr1 and user.attr3. The btrfs fsync log replay bug left the file
  # with the 3 xattrs that we had before deleting the second one and fsyncing the
  # file.
  echo "xattr names and values after first fsync log replay:"
  $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch
  # Now write some data to our file, fsync it, remove the first xattr, add a new
  # hard link to our file and commit the fsync log by fsyncing some other new
  # file. This is to verify that after log replay our first xattr does not exist
  # anymore.
  echo "hello world!" >> $SCRATCH_MNT/foobar
  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar
  $SETFATTR_PROG -x user.attr1 $SCRATCH_MNT/foobar
  ln $SCRATCH_MNT/foobar $SCRATCH_MNT/foobar_link
  touch $SCRATCH_MNT/qwerty
  $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/qwerty
  _crash_and_mount
  # Now only the xattr with name user.attr3 should be set in our file.
  echo "xattr names and values after second fsync log replay:"
  $GETFATTR_PROG --absolute-names --dump $SCRATCH_MNT/foobar | _filter_scratch
  status=0
  exit
The expected golden output, which is produced with this patch applied or
when testing against xfs or ext3/4, is:
  xattr names and values after first fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr1="val1"
  user.attr3="val3"
  xattr names and values after second fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr3="val3"
Without this patch applied, the output is:
  xattr names and values after first fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr1="val1"
  user.attr2="val2"
  user.attr3="val3"
  xattr names and values after second fsync log replay:
  # file: SCRATCH_MNT/foobar
  user.attr1="val1"
  user.attr2="val2"
  user.attr3="val3"
A patch with a test case for xfstests follows soon.
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
			
			
This commit is contained in:
		
							parent
							
								
									9b305632cb
								
							
						
					
					
						commit
						4f764e5153
					
				
					 1 changed files with 109 additions and 14 deletions
				
			
		| 
						 | 
					@ -1951,6 +1951,104 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
 | 
				
			||||||
 | 
								      struct btrfs_root *root,
 | 
				
			||||||
 | 
								      struct btrfs_root *log,
 | 
				
			||||||
 | 
								      struct btrfs_path *path,
 | 
				
			||||||
 | 
								      const u64 ino)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct btrfs_key search_key;
 | 
				
			||||||
 | 
						struct btrfs_path *log_path;
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
						int nritems;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						log_path = btrfs_alloc_path();
 | 
				
			||||||
 | 
						if (!log_path)
 | 
				
			||||||
 | 
							return -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						search_key.objectid = ino;
 | 
				
			||||||
 | 
						search_key.type = BTRFS_XATTR_ITEM_KEY;
 | 
				
			||||||
 | 
						search_key.offset = 0;
 | 
				
			||||||
 | 
					again:
 | 
				
			||||||
 | 
						ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					process_leaf:
 | 
				
			||||||
 | 
						nritems = btrfs_header_nritems(path->nodes[0]);
 | 
				
			||||||
 | 
						for (i = path->slots[0]; i < nritems; i++) {
 | 
				
			||||||
 | 
							struct btrfs_key key;
 | 
				
			||||||
 | 
							struct btrfs_dir_item *di;
 | 
				
			||||||
 | 
							struct btrfs_dir_item *log_di;
 | 
				
			||||||
 | 
							u32 total_size;
 | 
				
			||||||
 | 
							u32 cur;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							btrfs_item_key_to_cpu(path->nodes[0], &key, i);
 | 
				
			||||||
 | 
							if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
 | 
				
			||||||
 | 
								ret = 0;
 | 
				
			||||||
 | 
								goto out;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
 | 
				
			||||||
 | 
							total_size = btrfs_item_size_nr(path->nodes[0], i);
 | 
				
			||||||
 | 
							cur = 0;
 | 
				
			||||||
 | 
							while (cur < total_size) {
 | 
				
			||||||
 | 
								u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
 | 
				
			||||||
 | 
								u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
 | 
				
			||||||
 | 
								u32 this_len = sizeof(*di) + name_len + data_len;
 | 
				
			||||||
 | 
								char *name;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								name = kmalloc(name_len, GFP_NOFS);
 | 
				
			||||||
 | 
								if (!name) {
 | 
				
			||||||
 | 
									ret = -ENOMEM;
 | 
				
			||||||
 | 
									goto out;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								read_extent_buffer(path->nodes[0], name,
 | 
				
			||||||
 | 
										   (unsigned long)(di + 1), name_len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
 | 
				
			||||||
 | 
											    name, name_len, 0);
 | 
				
			||||||
 | 
								btrfs_release_path(log_path);
 | 
				
			||||||
 | 
								if (!log_di) {
 | 
				
			||||||
 | 
									/* Doesn't exist in log tree, so delete it. */
 | 
				
			||||||
 | 
									btrfs_release_path(path);
 | 
				
			||||||
 | 
									di = btrfs_lookup_xattr(trans, root, path, ino,
 | 
				
			||||||
 | 
												name, name_len, -1);
 | 
				
			||||||
 | 
									kfree(name);
 | 
				
			||||||
 | 
									if (IS_ERR(di)) {
 | 
				
			||||||
 | 
										ret = PTR_ERR(di);
 | 
				
			||||||
 | 
										goto out;
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									ASSERT(di);
 | 
				
			||||||
 | 
									ret = btrfs_delete_one_dir_name(trans, root,
 | 
				
			||||||
 | 
													path, di);
 | 
				
			||||||
 | 
									if (ret)
 | 
				
			||||||
 | 
										goto out;
 | 
				
			||||||
 | 
									btrfs_release_path(path);
 | 
				
			||||||
 | 
									search_key = key;
 | 
				
			||||||
 | 
									goto again;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								kfree(name);
 | 
				
			||||||
 | 
								if (IS_ERR(log_di)) {
 | 
				
			||||||
 | 
									ret = PTR_ERR(log_di);
 | 
				
			||||||
 | 
									goto out;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								cur += this_len;
 | 
				
			||||||
 | 
								di = (struct btrfs_dir_item *)((char *)di + this_len);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						ret = btrfs_next_leaf(root, path);
 | 
				
			||||||
 | 
						if (ret > 0)
 | 
				
			||||||
 | 
							ret = 0;
 | 
				
			||||||
 | 
						else if (ret == 0)
 | 
				
			||||||
 | 
							goto process_leaf;
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						btrfs_free_path(log_path);
 | 
				
			||||||
 | 
						btrfs_release_path(path);
 | 
				
			||||||
 | 
						return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * deletion replay happens before we copy any new directory items
 | 
					 * deletion replay happens before we copy any new directory items
 | 
				
			||||||
 * out of the log or out of backreferences from inodes.  It
 | 
					 * out of the log or out of backreferences from inodes.  It
 | 
				
			||||||
| 
						 | 
					@ -2104,6 +2202,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			inode_item = btrfs_item_ptr(eb, i,
 | 
								inode_item = btrfs_item_ptr(eb, i,
 | 
				
			||||||
					    struct btrfs_inode_item);
 | 
										    struct btrfs_inode_item);
 | 
				
			||||||
 | 
								ret = replay_xattr_deletes(wc->trans, root, log,
 | 
				
			||||||
 | 
											   path, key.objectid);
 | 
				
			||||||
 | 
								if (ret)
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
			mode = btrfs_inode_mode(eb, inode_item);
 | 
								mode = btrfs_inode_mode(eb, inode_item);
 | 
				
			||||||
			if (S_ISDIR(mode)) {
 | 
								if (S_ISDIR(mode)) {
 | 
				
			||||||
				ret = replay_dir_deletes(wc->trans,
 | 
									ret = replay_dir_deletes(wc->trans,
 | 
				
			||||||
| 
						 | 
					@ -4072,10 +4174,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 | 
				
			||||||
	if (S_ISDIR(inode->i_mode)) {
 | 
						if (S_ISDIR(inode->i_mode)) {
 | 
				
			||||||
		int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
 | 
							int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (inode_only == LOG_INODE_EXISTS) {
 | 
							if (inode_only == LOG_INODE_EXISTS)
 | 
				
			||||||
			max_key_type = BTRFS_INODE_EXTREF_KEY;
 | 
								max_key_type = BTRFS_XATTR_ITEM_KEY;
 | 
				
			||||||
			max_key.type = max_key_type;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
		ret = drop_objectid_items(trans, log, path, ino, max_key_type);
 | 
							ret = drop_objectid_items(trans, log, path, ino, max_key_type);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		if (inode_only == LOG_INODE_EXISTS) {
 | 
							if (inode_only == LOG_INODE_EXISTS) {
 | 
				
			||||||
| 
						 | 
					@ -4100,7 +4200,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 | 
				
			||||||
		if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 | 
							if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 | 
				
			||||||
			     &BTRFS_I(inode)->runtime_flags)) {
 | 
								     &BTRFS_I(inode)->runtime_flags)) {
 | 
				
			||||||
			if (inode_only == LOG_INODE_EXISTS) {
 | 
								if (inode_only == LOG_INODE_EXISTS) {
 | 
				
			||||||
				max_key.type = BTRFS_INODE_EXTREF_KEY;
 | 
									max_key.type = BTRFS_XATTR_ITEM_KEY;
 | 
				
			||||||
				ret = drop_objectid_items(trans, log, path, ino,
 | 
									ret = drop_objectid_items(trans, log, path, ino,
 | 
				
			||||||
							  max_key.type);
 | 
												  max_key.type);
 | 
				
			||||||
			} else {
 | 
								} else {
 | 
				
			||||||
| 
						 | 
					@ -4111,17 +4211,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 | 
				
			||||||
				ret = btrfs_truncate_inode_items(trans, log,
 | 
									ret = btrfs_truncate_inode_items(trans, log,
 | 
				
			||||||
								 inode, 0, 0);
 | 
													 inode, 0, 0);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
 | 
							} else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
 | 
				
			||||||
					      &BTRFS_I(inode)->runtime_flags) ||
 | 
										      &BTRFS_I(inode)->runtime_flags) ||
 | 
				
			||||||
			   inode_only == LOG_INODE_EXISTS) {
 | 
								   inode_only == LOG_INODE_EXISTS) {
 | 
				
			||||||
			if (inode_only == LOG_INODE_ALL) {
 | 
								if (inode_only == LOG_INODE_ALL)
 | 
				
			||||||
				clear_bit(BTRFS_INODE_COPY_EVERYTHING,
 | 
					 | 
				
			||||||
					  &BTRFS_I(inode)->runtime_flags);
 | 
					 | 
				
			||||||
				fast_search = true;
 | 
									fast_search = true;
 | 
				
			||||||
			max_key.type = BTRFS_XATTR_ITEM_KEY;
 | 
								max_key.type = BTRFS_XATTR_ITEM_KEY;
 | 
				
			||||||
			} else {
 | 
					 | 
				
			||||||
				max_key.type = BTRFS_INODE_EXTREF_KEY;
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			ret = drop_objectid_items(trans, log, path, ino,
 | 
								ret = drop_objectid_items(trans, log, path, ino,
 | 
				
			||||||
						  max_key.type);
 | 
											  max_key.type);
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue