forked from mirrors/linux
		
	Btrfs: Use async helpers to deal with pages that have been improperly dirtied
Higher layers sometimes call set_page_dirty without asking the filesystem to help. This causes many problems for the data=ordered and cow code. This commit detects pages that haven't been properly setup for IO and kicks off an async helper to deal with them. Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
		
							parent
							
								
									e6dcd2dc9c
								
							
						
					
					
						commit
						247e743cbe
					
				
					 6 changed files with 106 additions and 9 deletions
				
			
		| 
						 | 
				
			
			@ -546,6 +546,12 @@ struct btrfs_fs_info {
 | 
			
		|||
	struct btrfs_workers endio_workers;
 | 
			
		||||
	struct btrfs_workers endio_write_workers;
 | 
			
		||||
	struct btrfs_workers submit_workers;
 | 
			
		||||
	/*
 | 
			
		||||
	 * fixup workers take dirty pages that didn't properly go through
 | 
			
		||||
	 * the cow mechanism and make them safe to write.  It happens
 | 
			
		||||
	 * for the sys_munmap function call path
 | 
			
		||||
	 */
 | 
			
		||||
	struct btrfs_workers fixup_workers;
 | 
			
		||||
	struct task_struct *transaction_kthread;
 | 
			
		||||
	struct task_struct *cleaner_kthread;
 | 
			
		||||
	int thread_pool_size;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1329,11 +1329,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 | 
			
		|||
	 */
 | 
			
		||||
	btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
 | 
			
		||||
	btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
 | 
			
		||||
	btrfs_init_workers(&fs_info->fixup_workers, 1);
 | 
			
		||||
	btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
 | 
			
		||||
	btrfs_init_workers(&fs_info->endio_write_workers,
 | 
			
		||||
			   fs_info->thread_pool_size);
 | 
			
		||||
	btrfs_start_workers(&fs_info->workers, 1);
 | 
			
		||||
	btrfs_start_workers(&fs_info->submit_workers, 1);
 | 
			
		||||
	btrfs_start_workers(&fs_info->fixup_workers, 1);
 | 
			
		||||
	btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
 | 
			
		||||
	btrfs_start_workers(&fs_info->endio_write_workers,
 | 
			
		||||
			    fs_info->thread_pool_size);
 | 
			
		||||
| 
						 | 
				
			
			@ -1454,6 +1456,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 | 
			
		|||
fail_sys_array:
 | 
			
		||||
fail_sb_buffer:
 | 
			
		||||
	extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
 | 
			
		||||
	btrfs_stop_workers(&fs_info->fixup_workers);
 | 
			
		||||
	btrfs_stop_workers(&fs_info->workers);
 | 
			
		||||
	btrfs_stop_workers(&fs_info->endio_workers);
 | 
			
		||||
	btrfs_stop_workers(&fs_info->endio_write_workers);
 | 
			
		||||
| 
						 | 
				
			
			@ -1710,6 +1713,7 @@ int close_ctree(struct btrfs_root *root)
 | 
			
		|||
 | 
			
		||||
	truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
 | 
			
		||||
 | 
			
		||||
	btrfs_stop_workers(&fs_info->fixup_workers);
 | 
			
		||||
	btrfs_stop_workers(&fs_info->workers);
 | 
			
		||||
	btrfs_stop_workers(&fs_info->endio_workers);
 | 
			
		||||
	btrfs_stop_workers(&fs_info->endio_write_workers);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2050,6 +2050,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 | 
			
		|||
	lock_extent(tree, start, page_end, GFP_NOFS);
 | 
			
		||||
	unlock_start = start;
 | 
			
		||||
 | 
			
		||||
	if (tree->ops && tree->ops->writepage_start_hook) {
 | 
			
		||||
		ret = tree->ops->writepage_start_hook(page, start, page_end);
 | 
			
		||||
		if (ret == -EAGAIN) {
 | 
			
		||||
			unlock_extent(tree, start, page_end, GFP_NOFS);
 | 
			
		||||
			redirty_page_for_writepage(wbc, page);
 | 
			
		||||
			unlock_page(page);
 | 
			
		||||
			return 0;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	end = page_end;
 | 
			
		||||
	if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
 | 
			
		||||
		printk("found delalloc bits after lock_extent\n");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,6 +30,7 @@ typedef	int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
 | 
			
		|||
				       struct bio *bio, int mirror_num);
 | 
			
		||||
struct extent_io_ops {
 | 
			
		||||
	int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
 | 
			
		||||
	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
 | 
			
		||||
	int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
 | 
			
		||||
	extent_submit_bio_hook_t *submit_bio_hook;
 | 
			
		||||
	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -313,6 +313,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 | 
			
		|||
		for (i = 0; i < num_pages; i++) {
 | 
			
		||||
			struct page *p = pages[i];
 | 
			
		||||
			SetPageUptodate(p);
 | 
			
		||||
			ClearPageChecked(p);
 | 
			
		||||
			set_page_dirty(p);
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -403,6 +403,87 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
 | 
			
		|||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct btrfs_writepage_fixup {
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	struct btrfs_work work;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* see btrfs_writepage_start_hook for details on why this is required */
 | 
			
		||||
void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 | 
			
		||||
{
 | 
			
		||||
	struct btrfs_writepage_fixup *fixup;
 | 
			
		||||
	struct btrfs_ordered_extent *ordered;
 | 
			
		||||
	struct page *page;
 | 
			
		||||
	struct inode *inode;
 | 
			
		||||
	u64 page_start;
 | 
			
		||||
	u64 page_end;
 | 
			
		||||
 | 
			
		||||
	fixup = container_of(work, struct btrfs_writepage_fixup, work);
 | 
			
		||||
	page = fixup->page;
 | 
			
		||||
 | 
			
		||||
	lock_page(page);
 | 
			
		||||
	if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
 | 
			
		||||
		ClearPageChecked(page);
 | 
			
		||||
		goto out_page;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	inode = page->mapping->host;
 | 
			
		||||
	page_start = page_offset(page);
 | 
			
		||||
	page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
 | 
			
		||||
 | 
			
		||||
	lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
 | 
			
		||||
	ordered = btrfs_lookup_ordered_extent(inode, page_start);
 | 
			
		||||
	if (ordered)
 | 
			
		||||
		goto out;
 | 
			
		||||
 | 
			
		||||
	set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
 | 
			
		||||
			    GFP_NOFS);
 | 
			
		||||
	ClearPageChecked(page);
 | 
			
		||||
out:
 | 
			
		||||
	unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
 | 
			
		||||
out_page:
 | 
			
		||||
	unlock_page(page);
 | 
			
		||||
	page_cache_release(page);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * There are a few paths in the higher layers of the kernel that directly
 | 
			
		||||
 * set the page dirty bit without asking the filesystem if it is a
 | 
			
		||||
 * good idea.  This causes problems because we want to make sure COW
 | 
			
		||||
 * properly happens and the data=ordered rules are followed.
 | 
			
		||||
 *
 | 
			
		||||
 * In our case any range that doesn't have the EXTENT_ORDERED bit set
 | 
			
		||||
 * hasn't been properly setup for IO.  We kick off an async process
 | 
			
		||||
 * to fix it up.  The async helper will wait for ordered extents, set
 | 
			
		||||
 * the delalloc bit and make it safe to write the page.
 | 
			
		||||
 */
 | 
			
		||||
int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
 | 
			
		||||
{
 | 
			
		||||
	struct inode *inode = page->mapping->host;
 | 
			
		||||
	struct btrfs_writepage_fixup *fixup;
 | 
			
		||||
	struct btrfs_root *root = BTRFS_I(inode)->root;
 | 
			
		||||
	int ret;
 | 
			
		||||
 | 
			
		||||
	ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
 | 
			
		||||
			     EXTENT_ORDERED, 0);
 | 
			
		||||
	if (ret)
 | 
			
		||||
		return 0;
 | 
			
		||||
 | 
			
		||||
	if (PageChecked(page))
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
 | 
			
		||||
	fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
 | 
			
		||||
	if (!fixup)
 | 
			
		||||
		return -EAGAIN;
 | 
			
		||||
printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page));
 | 
			
		||||
	SetPageChecked(page);
 | 
			
		||||
	page_cache_get(page);
 | 
			
		||||
	fixup->work.func = btrfs_writepage_fixup_worker;
 | 
			
		||||
	fixup->page = page;
 | 
			
		||||
	btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
 | 
			
		||||
	return -EAGAIN;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 | 
			
		||||
				struct extent_state *state, int uptodate)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			@ -1263,6 +1344,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 | 
			
		|||
		flush_dcache_page(page);
 | 
			
		||||
		kunmap(page);
 | 
			
		||||
	}
 | 
			
		||||
	ClearPageChecked(page);
 | 
			
		||||
	set_page_dirty(page);
 | 
			
		||||
	unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2658,6 +2740,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 | 
			
		|||
		flush_dcache_page(page);
 | 
			
		||||
		kunmap(page);
 | 
			
		||||
	}
 | 
			
		||||
	ClearPageChecked(page);
 | 
			
		||||
	set_page_dirty(page);
 | 
			
		||||
	unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3039,15 +3122,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 | 
			
		|||
 | 
			
		||||
static int btrfs_set_page_dirty(struct page *page)
 | 
			
		||||
{
 | 
			
		||||
	struct inode *inode = page->mapping->host;
 | 
			
		||||
	u64 page_start = page_offset(page);
 | 
			
		||||
	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
 | 
			
		||||
 | 
			
		||||
	if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
 | 
			
		||||
			    EXTENT_DELALLOC, 0)) {
 | 
			
		||||
printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page));
 | 
			
		||||
WARN_ON(1);
 | 
			
		||||
	}
 | 
			
		||||
	return __set_page_dirty_nobuffers(page);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -3098,6 +3172,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
 | 
			
		|||
	.readpage_io_hook = btrfs_readpage_io_hook,
 | 
			
		||||
	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
 | 
			
		||||
	.writepage_end_io_hook = btrfs_writepage_end_io_hook,
 | 
			
		||||
	.writepage_start_hook = btrfs_writepage_start_hook,
 | 
			
		||||
	.readpage_io_failed_hook = btrfs_io_failed_hook,
 | 
			
		||||
	.set_bit_hook = btrfs_set_bit_hook,
 | 
			
		||||
	.clear_bit_hook = btrfs_clear_bit_hook,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue