mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	btrfs: qgroup: Fix qgroup accounting when creating snapshot
Current btrfs qgroup design implies a requirement that after calling btrfs_qgroup_account_extents() there must be a commit root switch. Normally this is OK, as btrfs_qgroup_accounting_extents() is only called inside btrfs_commit_transaction() just be commit_cowonly_roots(). However there is a exception at create_pending_snapshot(), which will call btrfs_qgroup_account_extents() but no any commit root switch. In case of creating a snapshot whose parent root is itself (create a snapshot of fs tree), it will corrupt qgroup by the following trace: (skipped unrelated data) ====== btrfs_qgroup_account_extent: bytenr = 29786112, num_bytes = 16384, nr_old_roots = 0, nr_new_roots = 1 qgroup_update_counters: qgid = 5, cur_old_count = 0, cur_new_count = 1, rfer = 0, excl = 0 qgroup_update_counters: qgid = 5, cur_old_count = 0, cur_new_count = 1, rfer = 16384, excl = 16384 btrfs_qgroup_account_extent: bytenr = 29786112, num_bytes = 16384, nr_old_roots = 0, nr_new_roots = 0 ====== The problem here is in first qgroup_account_extent(), the nr_new_roots of the extent is 1, which means its reference got increased, and qgroup increased its rfer and excl. But at second qgroup_account_extent(), its reference got decreased, but between these two qgroup_account_extent(), there is no switch roots. This leads to the same nr_old_roots, and this extent just got ignored by qgroup, which means this extent is wrongly accounted. Fix it by call commit_cowonly_roots() after qgroup_account_extent() in create_pending_snapshot(), with needed preparation. Mark: I added a check at the top of qgroup_account_snapshot() to skip this code if qgroups are turned off. xfstest btrfs/122 exposes this problem. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Mark Fasheh <mfasheh@suse.de> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
		
							parent
							
								
									72928f2476
								
							
						
					
					
						commit
						6426c7ad69
					
				
					 1 changed files with 105 additions and 24 deletions
				
			
		|  | @ -311,10 +311,11 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type) | |||
|  * when the transaction commits | ||||
|  */ | ||||
| static int record_root_in_trans(struct btrfs_trans_handle *trans, | ||||
| 			       struct btrfs_root *root) | ||||
| 			       struct btrfs_root *root, | ||||
| 			       int force) | ||||
| { | ||||
| 	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && | ||||
| 	    root->last_trans < trans->transid) { | ||||
| 	if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) && | ||||
| 	    root->last_trans < trans->transid) || force) { | ||||
| 		WARN_ON(root == root->fs_info->extent_root); | ||||
| 		WARN_ON(root->commit_root != root->node); | ||||
| 
 | ||||
|  | @ -331,7 +332,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 		smp_wmb(); | ||||
| 
 | ||||
| 		spin_lock(&root->fs_info->fs_roots_radix_lock); | ||||
| 		if (root->last_trans == trans->transid) { | ||||
| 		if (root->last_trans == trans->transid && !force) { | ||||
| 			spin_unlock(&root->fs_info->fs_roots_radix_lock); | ||||
| 			return 0; | ||||
| 		} | ||||
|  | @ -402,7 +403,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 		return 0; | ||||
| 
 | ||||
| 	mutex_lock(&root->fs_info->reloc_mutex); | ||||
| 	record_root_in_trans(trans, root); | ||||
| 	record_root_in_trans(trans, root, 0); | ||||
| 	mutex_unlock(&root->fs_info->reloc_mutex); | ||||
| 
 | ||||
| 	return 0; | ||||
|  | @ -1310,6 +1311,92 @@ int btrfs_defrag_root(struct btrfs_root *root) | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Do all special snapshot related qgroup dirty hack. | ||||
|  * | ||||
|  * Will do all needed qgroup inherit and dirty hack like switch commit | ||||
|  * roots inside one transaction and write all btree into disk, to make | ||||
|  * qgroup works. | ||||
|  */ | ||||
| static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, | ||||
| 				   struct btrfs_root *src, | ||||
| 				   struct btrfs_root *parent, | ||||
| 				   struct btrfs_qgroup_inherit *inherit, | ||||
| 				   u64 dst_objectid) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = src->fs_info; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Save some performance in the case that qgroups are not | ||||
| 	 * enabled. If this check races with the ioctl, rescan will | ||||
| 	 * kick in anyway. | ||||
| 	 */ | ||||
| 	mutex_lock(&fs_info->qgroup_ioctl_lock); | ||||
| 	if (!fs_info->quota_enabled) { | ||||
| 		mutex_unlock(&fs_info->qgroup_ioctl_lock); | ||||
| 		return 0; | ||||
| 	} | ||||
| 	mutex_unlock(&fs_info->qgroup_ioctl_lock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We are going to commit transaction, see btrfs_commit_transaction() | ||||
| 	 * comment for reason locking tree_log_mutex | ||||
| 	 */ | ||||
| 	mutex_lock(&fs_info->tree_log_mutex); | ||||
| 
 | ||||
| 	ret = commit_fs_roots(trans, src); | ||||
| 	if (ret) | ||||
| 		goto out; | ||||
| 	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); | ||||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 	ret = btrfs_qgroup_account_extents(trans, fs_info); | ||||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	/* Now qgroup are all updated, we can inherit it to new qgroups */ | ||||
| 	ret = btrfs_qgroup_inherit(trans, fs_info, | ||||
| 				   src->root_key.objectid, dst_objectid, | ||||
| 				   inherit); | ||||
| 	if (ret < 0) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now we do a simplified commit transaction, which will: | ||||
| 	 * 1) commit all subvolume and extent tree | ||||
| 	 *    To ensure all subvolume and extent tree have a valid | ||||
| 	 *    commit_root to accounting later insert_dir_item() | ||||
| 	 * 2) write all btree blocks onto disk | ||||
| 	 *    This is to make sure later btree modification will be cowed | ||||
| 	 *    Or commit_root can be populated and cause wrong qgroup numbers | ||||
| 	 * In this simplified commit, we don't really care about other trees | ||||
| 	 * like chunk and root tree, as they won't affect qgroup. | ||||
| 	 * And we don't write super to avoid half committed status. | ||||
| 	 */ | ||||
| 	ret = commit_cowonly_roots(trans, src); | ||||
| 	if (ret) | ||||
| 		goto out; | ||||
| 	switch_commit_roots(trans->transaction, fs_info); | ||||
| 	ret = btrfs_write_and_wait_transaction(trans, src); | ||||
| 	if (ret) | ||||
| 		btrfs_std_error(fs_info, ret, | ||||
| 			"Error while writing out transaction for qgroup"); | ||||
| 
 | ||||
| out: | ||||
| 	mutex_unlock(&fs_info->tree_log_mutex); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Force parent root to be updated, as we recorded it before so its | ||||
| 	 * last_trans == cur_transid. | ||||
| 	 * Or it won't be committed again onto disk after later | ||||
| 	 * insert_dir_item() | ||||
| 	 */ | ||||
| 	if (!ret) | ||||
| 		record_root_in_trans(trans, parent, 1); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * new snapshots need to be created at a very specific time in the | ||||
|  * transaction commit.  This does the actual creation. | ||||
|  | @ -1383,7 +1470,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 	dentry = pending->dentry; | ||||
| 	parent_inode = pending->dir; | ||||
| 	parent_root = BTRFS_I(parent_inode)->root; | ||||
| 	record_root_in_trans(trans, parent_root); | ||||
| 	record_root_in_trans(trans, parent_root, 0); | ||||
| 
 | ||||
| 	cur_time = current_fs_time(parent_inode->i_sb); | ||||
| 
 | ||||
|  | @ -1420,7 +1507,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	record_root_in_trans(trans, root); | ||||
| 	record_root_in_trans(trans, root, 0); | ||||
| 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | ||||
| 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | ||||
| 	btrfs_check_and_init_root_item(new_root_item); | ||||
|  | @ -1516,6 +1603,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Do special qgroup accounting for snapshot, as we do some qgroup | ||||
| 	 * snapshot hack to do fast snapshot. | ||||
| 	 * To co-operate with that hack, we do hack again. | ||||
| 	 * Or snapshot will be greatly slowed down by a subtree qgroup rescan | ||||
| 	 */ | ||||
| 	ret = qgroup_account_snapshot(trans, root, parent_root, | ||||
| 				      pending->inherit, objectid); | ||||
| 	if (ret < 0) | ||||
| 		goto fail; | ||||
| 
 | ||||
| 	ret = btrfs_insert_dir_item(trans, parent_root, | ||||
| 				    dentry->d_name.name, dentry->d_name.len, | ||||
| 				    parent_inode, &key, | ||||
|  | @ -1559,23 +1657,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * account qgroup counters before qgroup_inherit() | ||||
| 	 */ | ||||
| 	ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); | ||||
| 	if (ret) | ||||
| 		goto fail; | ||||
| 	ret = btrfs_qgroup_account_extents(trans, fs_info); | ||||
| 	if (ret) | ||||
| 		goto fail; | ||||
| 	ret = btrfs_qgroup_inherit(trans, fs_info, | ||||
| 				   root->root_key.objectid, | ||||
| 				   objectid, pending->inherit); | ||||
| 	if (ret) { | ||||
| 		btrfs_abort_transaction(trans, root, ret); | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| fail: | ||||
| 	pending->error = ret; | ||||
| dir_item_existed: | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Qu Wenruo
						Qu Wenruo