forked from mirrors/linux
		
	ext4: fix races between changing inode journal mode and ext4_writepages
In ext4, there is a race condition between changing inode journal mode and ext4_writepages(). While ext4_writepages() is executed on a non-journalled mode inode, the inode's journal mode could be enabled by ioctl() and then, some pages dirtied after switching the journal mode will be still exposed to ext4_writepages() in non-journaled mode. To resolve this problem, we use fs-wide per-cpu rw semaphore by Jan Kara's suggestion because we don't want to waste ext4_inode_info's space for this extra rare case. Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz>
This commit is contained in:
		
							parent
							
								
									4c54659269
								
							
						
					
					
						commit
						c8585c6fca
					
				
					 4 changed files with 21 additions and 3 deletions
				
			
		|  | @ -33,6 +33,7 @@ | |||
| #include <linux/ratelimit.h> | ||||
| #include <crypto/hash.h> | ||||
| #include <linux/falloc.h> | ||||
| #include <linux/percpu-rwsem.h> | ||||
| #ifdef __KERNEL__ | ||||
| #include <linux/compat.h> | ||||
| #endif | ||||
|  | @ -1508,6 +1509,9 @@ struct ext4_sb_info { | |||
| 	struct ratelimit_state s_err_ratelimit_state; | ||||
| 	struct ratelimit_state s_warning_ratelimit_state; | ||||
| 	struct ratelimit_state s_msg_ratelimit_state; | ||||
| 
 | ||||
| 	/* Barrier between changing inodes' journal flags and writepages ops. */ | ||||
| 	struct percpu_rw_semaphore s_journal_flag_rwsem; | ||||
| }; | ||||
| 
 | ||||
| static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | ||||
|  |  | |||
|  | @ -2612,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping, | |||
| 	struct blk_plug plug; | ||||
| 	bool give_up_on_write = false; | ||||
| 
 | ||||
| 	percpu_down_read(&sbi->s_journal_flag_rwsem); | ||||
| 	trace_ext4_writepages(inode, wbc); | ||||
| 
 | ||||
| 	if (dax_mapping(mapping)) | ||||
| 		return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, | ||||
| 						   wbc); | ||||
| 	if (dax_mapping(mapping)) { | ||||
| 		ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, | ||||
| 						  wbc); | ||||
| 		goto out_writepages; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * No pages to write? This is mainly a kludge to avoid starting | ||||
|  | @ -2786,6 +2789,7 @@ static int ext4_writepages(struct address_space *mapping, | |||
| out_writepages: | ||||
| 	trace_ext4_writepages_result(inode, wbc, ret, | ||||
| 				     nr_to_write - wbc->nr_to_write); | ||||
| 	percpu_up_read(&sbi->s_journal_flag_rwsem); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -5436,6 +5440,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 	journal_t *journal; | ||||
| 	handle_t *handle; | ||||
| 	int err; | ||||
| 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We have to be very careful here: changing a data block's | ||||
|  | @ -5475,6 +5480,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	percpu_down_write(&sbi->s_journal_flag_rwsem); | ||||
| 	jbd2_journal_lock_updates(journal); | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -5491,6 +5497,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 		err = jbd2_journal_flush(journal); | ||||
| 		if (err < 0) { | ||||
| 			jbd2_journal_unlock_updates(journal); | ||||
| 			percpu_up_write(&sbi->s_journal_flag_rwsem); | ||||
| 			ext4_inode_resume_unlocked_dio(inode); | ||||
| 			return err; | ||||
| 		} | ||||
|  | @ -5499,6 +5506,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 	ext4_set_aops(inode); | ||||
| 
 | ||||
| 	jbd2_journal_unlock_updates(journal); | ||||
| 	percpu_up_write(&sbi->s_journal_flag_rwsem); | ||||
| 
 | ||||
| 	if (val) | ||||
| 		up_write(&EXT4_I(inode)->i_mmap_sem); | ||||
| 	ext4_inode_resume_unlocked_dio(inode); | ||||
|  |  | |||
|  | @ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb) | |||
| 	percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||||
| 	percpu_counter_destroy(&sbi->s_dirs_counter); | ||||
| 	percpu_counter_destroy(&sbi->s_dirtyclusters_counter); | ||||
| 	percpu_free_rwsem(&sbi->s_journal_flag_rwsem); | ||||
| 	brelse(sbi->s_sbh); | ||||
| #ifdef CONFIG_QUOTA | ||||
| 	for (i = 0; i < EXT4_MAXQUOTAS; i++) | ||||
|  | @ -3930,6 +3931,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 	if (!err) | ||||
| 		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, | ||||
| 					  GFP_KERNEL); | ||||
| 	if (!err) | ||||
| 		err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); | ||||
| 
 | ||||
| 	if (err) { | ||||
| 		ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||||
| 		goto failed_mount6; | ||||
|  |  | |||
|  | @ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | |||
| 	free_percpu(brw->fast_read_ctr); | ||||
| 	brw->fast_read_ctr = NULL; /* catch use after free bugs */ | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(percpu_free_rwsem); | ||||
| 
 | ||||
| /*
 | ||||
|  * This is the fast-path for down_read/up_read. If it succeeds we rely | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Daeho Jeong
						Daeho Jeong