mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	btrfs: qgroup: Move half of the qgroup accounting time out of commit trans
Just as Filipe pointed out, the most time consuming parts of qgroup are btrfs_qgroup_account_extents() and btrfs_qgroup_prepare_account_extents(). Which both call btrfs_find_all_roots() to get old_roots and new_roots ulist. What makes things worse is, we're calling that expensive btrfs_find_all_roots() at transaction committing time with TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction. Such behavior is necessary for @new_roots search as current btrfs_find_all_roots() can't do it correctly so we do call it just before switch commit roots. However for @old_roots search, it's not necessary as such search is based on commit_root, so it will always be correct and we can move it out of transaction committing. This patch moves the @old_roots search part out of commit_transaction(), so in theory we can half the time qgroup time consumption at commit_transaction(). But please note that, this won't speedup qgroup overall, the total time consumption is still the same, just reduce the performance stall. Cc: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
		
							parent
							
								
									15b34517a6
								
							
						
					
					
						commit
						fb235dc06f
					
				
					 3 changed files with 75 additions and 11 deletions
				
			
		| 
						 | 
					@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
		     struct btrfs_delayed_ref_node *ref,
 | 
							     struct btrfs_delayed_ref_node *ref,
 | 
				
			||||||
		     struct btrfs_qgroup_extent_record *qrecord,
 | 
							     struct btrfs_qgroup_extent_record *qrecord,
 | 
				
			||||||
		     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
 | 
							     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
 | 
				
			||||||
		     int action, int is_data)
 | 
							     int action, int is_data, int *qrecord_inserted_ret)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btrfs_delayed_ref_head *existing;
 | 
						struct btrfs_delayed_ref_head *existing;
 | 
				
			||||||
	struct btrfs_delayed_ref_head *head_ref = NULL;
 | 
						struct btrfs_delayed_ref_head *head_ref = NULL;
 | 
				
			||||||
	struct btrfs_delayed_ref_root *delayed_refs;
 | 
						struct btrfs_delayed_ref_root *delayed_refs;
 | 
				
			||||||
	int count_mod = 1;
 | 
						int count_mod = 1;
 | 
				
			||||||
	int must_insert_reserved = 0;
 | 
						int must_insert_reserved = 0;
 | 
				
			||||||
 | 
						int qrecord_inserted = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* If reserved is provided, it must be a data extent. */
 | 
						/* If reserved is provided, it must be a data extent. */
 | 
				
			||||||
	BUG_ON(!is_data && reserved);
 | 
						BUG_ON(!is_data && reserved);
 | 
				
			||||||
| 
						 | 
					@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
		if(btrfs_qgroup_trace_extent_nolock(fs_info,
 | 
							if(btrfs_qgroup_trace_extent_nolock(fs_info,
 | 
				
			||||||
					delayed_refs, qrecord))
 | 
										delayed_refs, qrecord))
 | 
				
			||||||
			kfree(qrecord);
 | 
								kfree(qrecord);
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								qrecord_inserted = 1;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_init(&head_ref->lock);
 | 
						spin_lock_init(&head_ref->lock);
 | 
				
			||||||
| 
						 | 
					@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
		atomic_inc(&delayed_refs->num_entries);
 | 
							atomic_inc(&delayed_refs->num_entries);
 | 
				
			||||||
		trans->delayed_ref_updates++;
 | 
							trans->delayed_ref_updates++;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (qrecord_inserted_ret)
 | 
				
			||||||
 | 
							*qrecord_inserted_ret = qrecord_inserted;
 | 
				
			||||||
	return head_ref;
 | 
						return head_ref;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
	struct btrfs_delayed_ref_head *head_ref;
 | 
						struct btrfs_delayed_ref_head *head_ref;
 | 
				
			||||||
	struct btrfs_delayed_ref_root *delayed_refs;
 | 
						struct btrfs_delayed_ref_root *delayed_refs;
 | 
				
			||||||
	struct btrfs_qgroup_extent_record *record = NULL;
 | 
						struct btrfs_qgroup_extent_record *record = NULL;
 | 
				
			||||||
 | 
						int qrecord_inserted;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(extent_op && extent_op->is_data);
 | 
						BUG_ON(extent_op && extent_op->is_data);
 | 
				
			||||||
	ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 | 
						ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 | 
				
			||||||
| 
						 | 
					@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
	 * the spin lock
 | 
						 * the spin lock
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
 | 
						head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
 | 
				
			||||||
					bytenr, num_bytes, 0, 0, action, 0);
 | 
										bytenr, num_bytes, 0, 0, action, 0,
 | 
				
			||||||
 | 
										&qrecord_inserted);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 | 
						add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 | 
				
			||||||
			     num_bytes, parent, ref_root, level, action);
 | 
								     num_bytes, parent, ref_root, level, action);
 | 
				
			||||||
	spin_unlock(&delayed_refs->lock);
 | 
						spin_unlock(&delayed_refs->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (qrecord_inserted)
 | 
				
			||||||
 | 
							return btrfs_qgroup_trace_extent_post(fs_info, record);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
free_head_ref:
 | 
					free_head_ref:
 | 
				
			||||||
| 
						 | 
					@ -835,6 +844,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
	struct btrfs_delayed_ref_head *head_ref;
 | 
						struct btrfs_delayed_ref_head *head_ref;
 | 
				
			||||||
	struct btrfs_delayed_ref_root *delayed_refs;
 | 
						struct btrfs_delayed_ref_root *delayed_refs;
 | 
				
			||||||
	struct btrfs_qgroup_extent_record *record = NULL;
 | 
						struct btrfs_qgroup_extent_record *record = NULL;
 | 
				
			||||||
 | 
						int qrecord_inserted;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 | 
						ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 | 
				
			||||||
	if (!ref)
 | 
						if (!ref)
 | 
				
			||||||
| 
						 | 
					@ -868,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
 | 
						head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
 | 
				
			||||||
					bytenr, num_bytes, ref_root, reserved,
 | 
										bytenr, num_bytes, ref_root, reserved,
 | 
				
			||||||
					action, 1);
 | 
										action, 1, &qrecord_inserted);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 | 
						add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 | 
				
			||||||
				   num_bytes, parent, ref_root, owner, offset,
 | 
									   num_bytes, parent, ref_root, owner, offset,
 | 
				
			||||||
				   action);
 | 
									   action);
 | 
				
			||||||
	spin_unlock(&delayed_refs->lock);
 | 
						spin_unlock(&delayed_refs->lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (qrecord_inserted)
 | 
				
			||||||
 | 
							return btrfs_qgroup_trace_extent_post(fs_info, record);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -897,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
 | 
						add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
 | 
				
			||||||
			     num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
 | 
								     num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
 | 
				
			||||||
			     extent_op->is_data);
 | 
								     extent_op->is_data, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_unlock(&delayed_refs->lock);
 | 
						spin_unlock(&delayed_refs->lock);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1464,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
 | 
				
			||||||
	while (node) {
 | 
						while (node) {
 | 
				
			||||||
		record = rb_entry(node, struct btrfs_qgroup_extent_record,
 | 
							record = rb_entry(node, struct btrfs_qgroup_extent_record,
 | 
				
			||||||
				  node);
 | 
									  node);
 | 
				
			||||||
		ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
 | 
							if (WARN_ON(!record->old_roots))
 | 
				
			||||||
					   &record->old_roots);
 | 
								ret = btrfs_find_all_roots(NULL, fs_info,
 | 
				
			||||||
 | 
										record->bytenr, 0, &record->old_roots);
 | 
				
			||||||
		if (ret < 0)
 | 
							if (ret < 0)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		if (qgroup_to_skip)
 | 
							if (qgroup_to_skip)
 | 
				
			||||||
| 
						 | 
					@ -1504,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
 | 
									   struct btrfs_qgroup_extent_record *qrecord)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct ulist *old_root;
 | 
				
			||||||
 | 
						u64 bytenr = qrecord->bytenr;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Here we don't need to get the lock of
 | 
				
			||||||
 | 
						 * trans->transaction->delayed_refs, since inserted qrecord won't
 | 
				
			||||||
 | 
						 * be deleted, only qrecord->node may be modified (new qrecord insert)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * So modifying qrecord->old_roots is safe here
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						qrecord->old_roots = old_root;
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
 | 
					int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
 | 
				
			||||||
		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
 | 
							struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
 | 
				
			||||||
		gfp_t gfp_flag)
 | 
							gfp_t gfp_flag)
 | 
				
			||||||
| 
						 | 
					@ -1529,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
 | 
				
			||||||
	spin_lock(&delayed_refs->lock);
 | 
						spin_lock(&delayed_refs->lock);
 | 
				
			||||||
	ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
 | 
						ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
 | 
				
			||||||
	spin_unlock(&delayed_refs->lock);
 | 
						spin_unlock(&delayed_refs->lock);
 | 
				
			||||||
	if (ret > 0)
 | 
						if (ret > 0) {
 | 
				
			||||||
		kfree(record);
 | 
							kfree(record);
 | 
				
			||||||
	return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return btrfs_qgroup_trace_extent_post(fs_info, record);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
 | 
					int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
 | 
				
			||||||
					 struct btrfs_fs_info *fs_info);
 | 
										 struct btrfs_fs_info *fs_info);
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Inform qgroup to trace one dirty extent, its info is recorded in @record.
 | 
					 * Inform qgroup to trace one dirty extent, its info is recorded in @record.
 | 
				
			||||||
 * So qgroup can account it at commit trans time.
 | 
					 * So qgroup can account it at transaction committing time.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * No lock version, caller must acquire delayed ref lock and allocate memory.
 | 
					 * No lock version, caller must acquire delayed ref lock and allocated memory,
 | 
				
			||||||
 | 
					 * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Return 0 for success insert
 | 
					 * Return 0 for success insert
 | 
				
			||||||
 * Return >0 for existing record, caller can free @record safely.
 | 
					 * Return >0 for existing record, caller can free @record safely.
 | 
				
			||||||
| 
						 | 
					@ -107,12 +108,38 @@ int btrfs_qgroup_trace_extent_nolock(
 | 
				
			||||||
		struct btrfs_delayed_ref_root *delayed_refs,
 | 
							struct btrfs_delayed_ref_root *delayed_refs,
 | 
				
			||||||
		struct btrfs_qgroup_extent_record *record);
 | 
							struct btrfs_qgroup_extent_record *record);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Post handler after qgroup_trace_extent_nolock().
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * NOTE: Current qgroup does the expensive backref walk at transaction
 | 
				
			||||||
 | 
					 * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
 | 
				
			||||||
 | 
					 * new transaction.
 | 
				
			||||||
 | 
					 * This is designed to allow btrfs_find_all_roots() to get correct new_roots
 | 
				
			||||||
 | 
					 * result.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * However for old_roots there is no need to do backref walk at that time,
 | 
				
			||||||
 | 
					 * since we search commit roots to walk backref and result will always be
 | 
				
			||||||
 | 
					 * correct.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Due to the nature of no lock version, we can't do backref there.
 | 
				
			||||||
 | 
					 * So we must call btrfs_qgroup_trace_extent_post() after exiting
 | 
				
			||||||
 | 
					 * spinlock context.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
 | 
				
			||||||
 | 
					 * using current root, then we can move all expensive backref walk out of
 | 
				
			||||||
 | 
					 * transaction committing, but not now as qgroup accounting will be wrong again.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
 | 
									   struct btrfs_qgroup_extent_record *qrecord);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Inform qgroup to trace one dirty extent, specified by @bytenr and
 | 
					 * Inform qgroup to trace one dirty extent, specified by @bytenr and
 | 
				
			||||||
 * @num_bytes.
 | 
					 * @num_bytes.
 | 
				
			||||||
 * So qgroup can account it at commit trans time.
 | 
					 * So qgroup can account it at commit trans time.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Better encapsulated version.
 | 
					 * Better encapsulated version, with memory allocation and backref walk for
 | 
				
			||||||
 | 
					 * commit roots.
 | 
				
			||||||
 | 
					 * So this can sleep.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Return 0 if the operation is done.
 | 
					 * Return 0 if the operation is done.
 | 
				
			||||||
 * Return <0 for error, like memory allocation failure or invalid parameter
 | 
					 * Return <0 for error, like memory allocation failure or invalid parameter
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue