forked from mirrors/linux
		
	Btrfs: avoid tree log commit when there are no changes
rpm has a habit of running fdatasync when the file hasn't changed. We already detect if a file hasn't been changed in the current transaction but it might have been sent to the tree-log in this transaction and not changed since the last call to fsync. In this case, we want to avoid a tree log sync, which includes a number of synchronous writes and barriers. This commit extends the existing tracking of the last transaction to change a file to also track the last sub-transaction. The end result is that rpm -ivh and -Uvh are roughly twice as fast, and on par with ext3. Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
		
							parent
							
								
									4722607db6
								
							
						
					
					
						commit
						257c62e1bc
					
				
					 8 changed files with 71 additions and 16 deletions
				
			
		| 
						 | 
				
			
			@ -86,6 +86,12 @@ struct btrfs_inode {
 | 
			
		|||
	 * transid of the trans_handle that last modified this inode
 | 
			
		||||
	 */
 | 
			
		||||
	u64 last_trans;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * log transid when this inode was last modified
 | 
			
		||||
	 */
 | 
			
		||||
	u64 last_sub_trans;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * transid that last logged this inode
 | 
			
		||||
	 */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1009,6 +1009,7 @@ struct btrfs_root {
 | 
			
		|||
	atomic_t log_writers;
 | 
			
		||||
	atomic_t log_commit[2];
 | 
			
		||||
	unsigned long log_transid;
 | 
			
		||||
	unsigned long last_log_commit;
 | 
			
		||||
	unsigned long log_batch;
 | 
			
		||||
	pid_t log_start_pid;
 | 
			
		||||
	bool log_multiple_pids;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -919,6 +919,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 | 
			
		|||
	atomic_set(&root->log_writers, 0);
 | 
			
		||||
	root->log_batch = 0;
 | 
			
		||||
	root->log_transid = 0;
 | 
			
		||||
	root->last_log_commit = 0;
 | 
			
		||||
	extent_io_tree_init(&root->dirty_log_pages,
 | 
			
		||||
			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1089,6 +1090,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
 | 
			
		|||
	WARN_ON(root->log_root);
 | 
			
		||||
	root->log_root = log_root;
 | 
			
		||||
	root->log_transid = 0;
 | 
			
		||||
	root->last_log_commit = 0;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1087,8 +1087,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 | 
			
		|||
					btrfs_end_transaction(trans, root);
 | 
			
		||||
				else
 | 
			
		||||
					btrfs_commit_transaction(trans, root);
 | 
			
		||||
			} else {
 | 
			
		||||
			} else if (ret != BTRFS_NO_LOG_SYNC) {
 | 
			
		||||
				btrfs_commit_transaction(trans, root);
 | 
			
		||||
			} else {
 | 
			
		||||
				btrfs_end_transaction(trans, root);
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if (file->f_flags & O_DIRECT) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1138,6 +1140,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 | 
			
		|||
	int ret = 0;
 | 
			
		||||
	struct btrfs_trans_handle *trans;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	/* we wait first, since the writeback may change the inode */
 | 
			
		||||
	root->log_batch++;
 | 
			
		||||
	/* the VFS called filemap_fdatawrite for us */
 | 
			
		||||
	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 | 
			
		||||
	root->log_batch++;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * check the transaction that last modified this inode
 | 
			
		||||
	 * and see if its already been committed
 | 
			
		||||
| 
						 | 
				
			
			@ -1145,6 +1154,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 | 
			
		|||
	if (!BTRFS_I(inode)->last_trans)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * if the last transaction that changed this file was before
 | 
			
		||||
	 * the current transaction, we can bail out now without any
 | 
			
		||||
	 * syncing
 | 
			
		||||
	 */
 | 
			
		||||
	mutex_lock(&root->fs_info->trans_mutex);
 | 
			
		||||
	if (BTRFS_I(inode)->last_trans <=
 | 
			
		||||
	    root->fs_info->last_trans_committed) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1154,13 +1168,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 | 
			
		|||
	}
 | 
			
		||||
	mutex_unlock(&root->fs_info->trans_mutex);
 | 
			
		||||
 | 
			
		||||
	root->log_batch++;
 | 
			
		||||
	filemap_fdatawrite(inode->i_mapping);
 | 
			
		||||
	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 | 
			
		||||
	root->log_batch++;
 | 
			
		||||
 | 
			
		||||
	if (datasync && !(inode->i_state & I_DIRTY_PAGES))
 | 
			
		||||
		goto out;
 | 
			
		||||
	/*
 | 
			
		||||
	 * ok we haven't committed the transaction yet, lets do a commit
 | 
			
		||||
	 */
 | 
			
		||||
| 
						 | 
				
			
			@ -1189,14 +1196,18 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 | 
			
		|||
	 */
 | 
			
		||||
	mutex_unlock(&dentry->d_inode->i_mutex);
 | 
			
		||||
 | 
			
		||||
	if (ret > 0) {
 | 
			
		||||
		ret = btrfs_commit_transaction(trans, root);
 | 
			
		||||
	} else {
 | 
			
		||||
		ret = btrfs_sync_log(trans, root);
 | 
			
		||||
		if (ret == 0)
 | 
			
		||||
			ret = btrfs_end_transaction(trans, root);
 | 
			
		||||
		else
 | 
			
		||||
	if (ret != BTRFS_NO_LOG_SYNC) {
 | 
			
		||||
		if (ret > 0) {
 | 
			
		||||
			ret = btrfs_commit_transaction(trans, root);
 | 
			
		||||
		} else {
 | 
			
		||||
			ret = btrfs_sync_log(trans, root);
 | 
			
		||||
			if (ret == 0)
 | 
			
		||||
				ret = btrfs_end_transaction(trans, root);
 | 
			
		||||
			else
 | 
			
		||||
				ret = btrfs_commit_transaction(trans, root);
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		ret = btrfs_end_transaction(trans, root);
 | 
			
		||||
	}
 | 
			
		||||
	mutex_lock(&dentry->d_inode->i_mutex);
 | 
			
		||||
out:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3480,6 +3480,7 @@ static noinline void init_btrfs_i(struct inode *inode)
 | 
			
		|||
	bi->generation = 0;
 | 
			
		||||
	bi->sequence = 0;
 | 
			
		||||
	bi->last_trans = 0;
 | 
			
		||||
	bi->last_sub_trans = 0;
 | 
			
		||||
	bi->logged_trans = 0;
 | 
			
		||||
	bi->delalloc_bytes = 0;
 | 
			
		||||
	bi->reserved_bytes = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -4980,7 +4981,9 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 | 
			
		|||
	set_page_dirty(page);
 | 
			
		||||
	SetPageUptodate(page);
 | 
			
		||||
 | 
			
		||||
	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
 | 
			
		||||
	BTRFS_I(inode)->last_trans = root->fs_info->generation;
 | 
			
		||||
	BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
 | 
			
		||||
 | 
			
		||||
	unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 | 
			
		||||
 | 
			
		||||
out_unlock:
 | 
			
		||||
| 
						 | 
				
			
			@ -5100,6 +5103,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 | 
			
		|||
	if (!ei)
 | 
			
		||||
		return NULL;
 | 
			
		||||
	ei->last_trans = 0;
 | 
			
		||||
	ei->last_sub_trans = 0;
 | 
			
		||||
	ei->logged_trans = 0;
 | 
			
		||||
	ei->outstanding_extents = 0;
 | 
			
		||||
	ei->reserved_extents = 0;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
 | 
			
		|||
					      struct inode *inode)
 | 
			
		||||
{
 | 
			
		||||
	BTRFS_I(inode)->last_trans = trans->transaction->transid;
 | 
			
		||||
	BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1980,6 +1980,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 | 
			
		|||
	int ret;
 | 
			
		||||
	struct btrfs_root *log = root->log_root;
 | 
			
		||||
	struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
 | 
			
		||||
	u64 log_transid = 0;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&root->log_mutex);
 | 
			
		||||
	index1 = root->log_transid % 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -2018,6 +2019,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 | 
			
		|||
	btrfs_set_root_node(&log->root_item, log->node);
 | 
			
		||||
 | 
			
		||||
	root->log_batch = 0;
 | 
			
		||||
	log_transid = root->log_transid;
 | 
			
		||||
	root->log_transid++;
 | 
			
		||||
	log->log_transid = root->log_transid;
 | 
			
		||||
	root->log_start_pid = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -2095,6 +2097,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 | 
			
		|||
	write_ctree_super(trans, root->fs_info->tree_root, 1);
 | 
			
		||||
	ret = 0;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&root->log_mutex);
 | 
			
		||||
	if (root->last_log_commit < log_transid)
 | 
			
		||||
		root->last_log_commit = log_transid;
 | 
			
		||||
	mutex_unlock(&root->log_mutex);
 | 
			
		||||
 | 
			
		||||
out_wake_log_root:
 | 
			
		||||
	atomic_set(&log_root_tree->log_commit[index2], 0);
 | 
			
		||||
	smp_mb();
 | 
			
		||||
| 
						 | 
				
			
			@ -2862,6 +2869,21 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 | 
			
		|||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int inode_in_log(struct btrfs_trans_handle *trans,
 | 
			
		||||
		 struct inode *inode)
 | 
			
		||||
{
 | 
			
		||||
	struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&root->log_mutex);
 | 
			
		||||
	if (BTRFS_I(inode)->logged_trans == trans->transid &&
 | 
			
		||||
	    BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
 | 
			
		||||
		ret = 1;
 | 
			
		||||
	mutex_unlock(&root->log_mutex);
 | 
			
		||||
	return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * helper function around btrfs_log_inode to make sure newly created
 | 
			
		||||
 * parent directories also end up in the log.  A minimal inode and backref
 | 
			
		||||
| 
						 | 
				
			
			@ -2901,6 +2923,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 | 
			
		|||
	if (ret)
 | 
			
		||||
		goto end_no_trans;
 | 
			
		||||
 | 
			
		||||
	if (inode_in_log(trans, inode)) {
 | 
			
		||||
		ret = BTRFS_NO_LOG_SYNC;
 | 
			
		||||
		goto end_no_trans;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	start_log_trans(trans, root);
 | 
			
		||||
 | 
			
		||||
	ret = btrfs_log_inode(trans, root, inode, inode_only);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,6 +19,9 @@
 | 
			
		|||
#ifndef __TREE_LOG_
 | 
			
		||||
#define __TREE_LOG_
 | 
			
		||||
 | 
			
		||||
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
 | 
			
		||||
#define BTRFS_NO_LOG_SYNC 256
 | 
			
		||||
 | 
			
		||||
int btrfs_sync_log(struct btrfs_trans_handle *trans,
 | 
			
		||||
		   struct btrfs_root *root);
 | 
			
		||||
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue