mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	writeback: fix WB_SYNC_NONE writeback from umount
When umount calls sync_filesystem(), we first do a WB_SYNC_NONE writeback to kick off writeback of pending dirty inodes, then follow that up with a WB_SYNC_ALL to wait for it. Since umount already holds the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all writeback happens as WB_SYNC_ALL. This can greatly slow down umount, since WB_SYNC_ALL writeback is a data integrity operation and thus a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems it's a lot slower. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
		
							parent
							
								
									69b62d01ec
								
							
						
					
					
						commit
						e913fc825d
					
				
					 5 changed files with 51 additions and 15 deletions
				
			
		| 
						 | 
				
			
			@ -45,6 +45,7 @@ struct wb_writeback_args {
 | 
			
		|||
	int for_kupdate:1;
 | 
			
		||||
	int range_cyclic:1;
 | 
			
		||||
	int for_background:1;
 | 
			
		||||
	int sb_pinned:1;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
 | 
			
		|||
		.sync_mode	= WB_SYNC_ALL,
 | 
			
		||||
		.nr_pages	= LONG_MAX,
 | 
			
		||||
		.range_cyclic	= 0,
 | 
			
		||||
		/*
 | 
			
		||||
		 * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
 | 
			
		||||
		 * lets make it explicitly clear.
 | 
			
		||||
		 */
 | 
			
		||||
		.sb_pinned	= 1,
 | 
			
		||||
	};
 | 
			
		||||
	struct bdi_work work;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
 | 
			
		|||
 * @bdi: the backing device to write from
 | 
			
		||||
 * @sb: write inodes from this super_block
 | 
			
		||||
 * @nr_pages: the number of pages to write
 | 
			
		||||
 * @sb_locked: caller already holds sb umount sem.
 | 
			
		||||
 *
 | 
			
		||||
 * Description:
 | 
			
		||||
 *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
 | 
			
		||||
 *   started when this function returns, we make no guarentees on
 | 
			
		||||
 *   completion. Caller need not hold sb s_umount semaphore.
 | 
			
		||||
 *   completion. Caller specifies whether sb umount sem is held already or not.
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 | 
			
		||||
			 long nr_pages)
 | 
			
		||||
			 long nr_pages, int sb_locked)
 | 
			
		||||
{
 | 
			
		||||
	struct wb_writeback_args args = {
 | 
			
		||||
		.sb		= sb,
 | 
			
		||||
		.sync_mode	= WB_SYNC_NONE,
 | 
			
		||||
		.nr_pages	= nr_pages,
 | 
			
		||||
		.range_cyclic	= 1,
 | 
			
		||||
		.sb_pinned	= sb_locked,
 | 
			
		||||
	};
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
 | 
			
		|||
	/*
 | 
			
		||||
	 * Caller must already hold the ref for this
 | 
			
		||||
	 */
 | 
			
		||||
	if (wbc->sync_mode == WB_SYNC_ALL) {
 | 
			
		||||
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
 | 
			
		||||
		WARN_ON(!rwsem_is_locked(&sb->s_umount));
 | 
			
		||||
		return SB_NOT_PINNED;
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 | 
			
		|||
		.for_kupdate		= args->for_kupdate,
 | 
			
		||||
		.for_background		= args->for_background,
 | 
			
		||||
		.range_cyclic		= args->range_cyclic,
 | 
			
		||||
		.sb_pinned		= args->sb_pinned,
 | 
			
		||||
	};
 | 
			
		||||
	unsigned long oldest_jif;
 | 
			
		||||
	long wrote = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb)
 | 
			
		|||
	iput(old_inode);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 | 
			
		||||
	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
 | 
			
		||||
	long nr_to_write;
 | 
			
		||||
 | 
			
		||||
	nr_to_write = nr_dirty + nr_unstable +
 | 
			
		||||
			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 | 
			
		||||
 | 
			
		||||
	bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * writeback_inodes_sb	-	writeback dirty inodes from given super_block
 | 
			
		||||
 * @sb: the superblock
 | 
			
		||||
| 
						 | 
				
			
			@ -1204,17 +1225,22 @@ static void wait_sb_inodes(struct super_block *sb)
 | 
			
		|||
 */
 | 
			
		||||
void writeback_inodes_sb(struct super_block *sb)
 | 
			
		||||
{
 | 
			
		||||
	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 | 
			
		||||
	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
 | 
			
		||||
	long nr_to_write;
 | 
			
		||||
 | 
			
		||||
	nr_to_write = nr_dirty + nr_unstable +
 | 
			
		||||
			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 | 
			
		||||
 | 
			
		||||
	bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
 | 
			
		||||
	__writeback_inodes_sb(sb, 0);
 | 
			
		||||
}
 | 
			
		||||
EXPORT_SYMBOL(writeback_inodes_sb);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * writeback_inodes_sb_locked	- writeback dirty inodes from given super_block
 | 
			
		||||
 * @sb: the superblock
 | 
			
		||||
 *
 | 
			
		||||
 * Like writeback_inodes_sb(), except the caller already holds the
 | 
			
		||||
 * sb umount sem.
 | 
			
		||||
 */
 | 
			
		||||
void writeback_inodes_sb_locked(struct super_block *sb)
 | 
			
		||||
{
 | 
			
		||||
	__writeback_inodes_sb(sb, 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * writeback_inodes_sb_if_idle	-	start writeback if none underway
 | 
			
		||||
 * @sb: the superblock
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 | 
			
		|||
	if (wait)
 | 
			
		||||
		sync_inodes_sb(sb);
 | 
			
		||||
	else
 | 
			
		||||
		writeback_inodes_sb(sb);
 | 
			
		||||
		writeback_inodes_sb_locked(sb);
 | 
			
		||||
 | 
			
		||||
	if (sb->s_op->sync_fs)
 | 
			
		||||
		sb->s_op->sync_fs(sb, wait);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 | 
			
		|||
void bdi_unregister(struct backing_dev_info *bdi);
 | 
			
		||||
int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 | 
			
		||||
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
 | 
			
		||||
				long nr_pages);
 | 
			
		||||
				long nr_pages, int sb_locked);
 | 
			
		||||
int bdi_writeback_task(struct bdi_writeback *wb);
 | 
			
		||||
int bdi_has_dirty_io(struct backing_dev_info *bdi);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -65,6 +65,15 @@ struct writeback_control {
 | 
			
		|||
	 * so we use a single control to update them
 | 
			
		||||
	 */
 | 
			
		||||
	unsigned no_nrwrite_index_update:1;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE,
 | 
			
		||||
	 * the writeback code will pin the sb for the caller. However,
 | 
			
		||||
	 * for eg umount, the caller does WB_SYNC_NONE but already has
 | 
			
		||||
	 * the sb pinned. If the below is set, caller already has the
 | 
			
		||||
	 * sb pinned.
 | 
			
		||||
	 */
 | 
			
		||||
	unsigned sb_pinned:1;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -73,6 +82,7 @@ struct writeback_control {
 | 
			
		|||
struct bdi_writeback;
 | 
			
		||||
int inode_wait(void *);
 | 
			
		||||
void writeback_inodes_sb(struct super_block *);
 | 
			
		||||
void writeback_inodes_sb_locked(struct super_block *);
 | 
			
		||||
int writeback_inodes_sb_if_idle(struct super_block *);
 | 
			
		||||
void sync_inodes_sb(struct super_block *);
 | 
			
		||||
void writeback_inodes_wbc(struct writeback_control *wbc);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 | 
			
		|||
	    (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
 | 
			
		||||
			       + global_page_state(NR_UNSTABLE_NFS))
 | 
			
		||||
					  > background_thresh)))
 | 
			
		||||
		bdi_start_writeback(bdi, NULL, 0);
 | 
			
		||||
		bdi_start_writeback(bdi, NULL, 0, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void set_page_dirty_balance(struct page *page, int page_mkwrite)
 | 
			
		||||
| 
						 | 
				
			
			@ -705,7 +705,7 @@ void laptop_mode_timer_fn(unsigned long data)
 | 
			
		|||
	 */
 | 
			
		||||
 | 
			
		||||
	if (bdi_has_dirty_io(&q->backing_dev_info))
 | 
			
		||||
		bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages);
 | 
			
		||||
		bdi_start_writeback(&q->backing_dev_info, NULL, 0, nr_pages);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue