mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	ext4: use transaction reservation for extent conversion in ext4_end_io
Later we would like to clear PageWriteback bit only after extent conversion from unwritten to written extents is performed. However it is not possible to start a transaction after PageWriteback is set because that violates lock ordering (and is easy to deadlock). So we have to reserve a transaction before locking pages and sending them for IO and later we use the transaction for extent conversion from ext4_end_io(). Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									3613d22807
								
							
						
					
					
						commit
						6b523df4fb
					
				
					 5 changed files with 69 additions and 24 deletions
				
			
		| 
						 | 
					@ -184,10 +184,13 @@ struct ext4_map_blocks {
 | 
				
			||||||
#define EXT4_IO_END_DIRECT	0x0004
 | 
					#define EXT4_IO_END_DIRECT	0x0004
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * For converting uninitialized extents on a work queue.
 | 
					 * For converting uninitialized extents on a work queue. 'handle' is used for
 | 
				
			||||||
 | 
					 * buffered writeback.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
typedef struct ext4_io_end {
 | 
					typedef struct ext4_io_end {
 | 
				
			||||||
	struct list_head	list;		/* per-file finished IO list */
 | 
						struct list_head	list;		/* per-file finished IO list */
 | 
				
			||||||
 | 
						handle_t		*handle;	/* handle reserved for extent
 | 
				
			||||||
 | 
											 * conversion */
 | 
				
			||||||
	struct inode		*inode;		/* file being written to */
 | 
						struct inode		*inode;		/* file being written to */
 | 
				
			||||||
	unsigned int		flag;		/* unwritten or not */
 | 
						unsigned int		flag;		/* unwritten or not */
 | 
				
			||||||
	loff_t			offset;		/* offset in the file */
 | 
						loff_t			offset;		/* offset in the file */
 | 
				
			||||||
| 
						 | 
					@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
 | 
				
			||||||
					      struct ext4_io_end *io_end)
 | 
										      struct ext4_io_end *io_end)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
 | 
						if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
 | 
				
			||||||
 | 
							/* Writeback has to have coversion transaction reserved */
 | 
				
			||||||
 | 
							WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
 | 
				
			||||||
 | 
								!(io_end->flag & EXT4_IO_END_DIRECT));
 | 
				
			||||||
		io_end->flag |= EXT4_IO_END_UNWRITTEN;
 | 
							io_end->flag |= EXT4_IO_END_UNWRITTEN;
 | 
				
			||||||
		atomic_inc(&EXT4_I(inode)->i_unwritten);
 | 
							atomic_inc(&EXT4_I(inode)->i_unwritten);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *);
 | 
				
			||||||
extern void ext4_ext_release(struct super_block *);
 | 
					extern void ext4_ext_release(struct super_block *);
 | 
				
			||||||
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
 | 
					extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
 | 
				
			||||||
			  loff_t len);
 | 
								  loff_t len);
 | 
				
			||||||
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
 | 
					extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
 | 
				
			||||||
			  ssize_t len);
 | 
										  loff_t offset, ssize_t len);
 | 
				
			||||||
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
 | 
					extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
 | 
				
			||||||
			   struct ext4_map_blocks *map, int flags);
 | 
								   struct ext4_map_blocks *map, int flags);
 | 
				
			||||||
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
 | 
					extern int ext4_ext_calc_metadata_amount(struct inode *inode,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
 | 
				
			||||||
#define EXT4_HT_MIGRATE          8
 | 
					#define EXT4_HT_MIGRATE          8
 | 
				
			||||||
#define EXT4_HT_MOVE_EXTENTS     9
 | 
					#define EXT4_HT_MOVE_EXTENTS     9
 | 
				
			||||||
#define EXT4_HT_XATTR           10
 | 
					#define EXT4_HT_XATTR           10
 | 
				
			||||||
#define EXT4_HT_MAX             11
 | 
					#define EXT4_HT_EXT_CONVERT     11
 | 
				
			||||||
 | 
					#define EXT4_HT_MAX             12
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 *   struct ext4_journal_cb_entry - Base structure for callback information.
 | 
					 *   struct ext4_journal_cb_entry - Base structure for callback information.
 | 
				
			||||||
| 
						 | 
					@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
 | 
				
			||||||
#define ext4_journal_stop(handle) \
 | 
					#define ext4_journal_stop(handle) \
 | 
				
			||||||
	__ext4_journal_stop(__func__, __LINE__, (handle))
 | 
						__ext4_journal_stop(__func__, __LINE__, (handle))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define ext4_journal_start_reserve(handle, type) \
 | 
					#define ext4_journal_start_reserved(handle, type) \
 | 
				
			||||||
	__ext4_journal_start_reserved((handle), __LINE__, (type))
 | 
						__ext4_journal_start_reserved((handle), __LINE__, (type))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
 | 
					handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4566,10 +4566,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 | 
				
			||||||
 * function, to convert the fallocated extents after IO is completed.
 | 
					 * function, to convert the fallocated extents after IO is completed.
 | 
				
			||||||
 * Returns 0 on success.
 | 
					 * Returns 0 on success.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
 | 
					int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
 | 
				
			||||||
				    ssize_t len)
 | 
									   loff_t offset, ssize_t len)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	handle_t *handle;
 | 
					 | 
				
			||||||
	unsigned int max_blocks;
 | 
						unsigned int max_blocks;
 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
	int ret2 = 0;
 | 
						int ret2 = 0;
 | 
				
			||||||
| 
						 | 
					@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
 | 
				
			||||||
	max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
 | 
						max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
 | 
				
			||||||
		      map.m_lblk);
 | 
							      map.m_lblk);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * credits to insert 1 extent into extent tree
 | 
						 * This is somewhat ugly but the idea is clear: When transaction is
 | 
				
			||||||
 | 
						 * reserved, everything goes into it. Otherwise we rather start several
 | 
				
			||||||
 | 
						 * smaller transactions for conversion of each extent separately.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	credits = ext4_chunk_trans_blocks(inode, max_blocks);
 | 
						if (handle) {
 | 
				
			||||||
 | 
							handle = ext4_journal_start_reserved(handle,
 | 
				
			||||||
 | 
											     EXT4_HT_EXT_CONVERT);
 | 
				
			||||||
 | 
							if (IS_ERR(handle))
 | 
				
			||||||
 | 
								return PTR_ERR(handle);
 | 
				
			||||||
 | 
							credits = 0;
 | 
				
			||||||
 | 
						} else {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * credits to insert 1 extent into extent tree
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							credits = ext4_chunk_trans_blocks(inode, max_blocks);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	while (ret >= 0 && ret < max_blocks) {
 | 
						while (ret >= 0 && ret < max_blocks) {
 | 
				
			||||||
		map.m_lblk += ret;
 | 
							map.m_lblk += ret;
 | 
				
			||||||
		map.m_len = (max_blocks -= ret);
 | 
							map.m_len = (max_blocks -= ret);
 | 
				
			||||||
		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
 | 
							if (credits) {
 | 
				
			||||||
		if (IS_ERR(handle)) {
 | 
								handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
 | 
				
			||||||
			ret = PTR_ERR(handle);
 | 
											    credits);
 | 
				
			||||||
			break;
 | 
								if (IS_ERR(handle)) {
 | 
				
			||||||
 | 
									ret = PTR_ERR(handle);
 | 
				
			||||||
 | 
									break;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		ret = ext4_map_blocks(handle, inode, &map,
 | 
							ret = ext4_map_blocks(handle, inode, &map,
 | 
				
			||||||
				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
 | 
									      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
 | 
				
			||||||
| 
						 | 
					@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
 | 
				
			||||||
				     inode->i_ino, map.m_lblk,
 | 
									     inode->i_ino, map.m_lblk,
 | 
				
			||||||
				     map.m_len, ret);
 | 
									     map.m_len, ret);
 | 
				
			||||||
		ext4_mark_inode_dirty(handle, inode);
 | 
							ext4_mark_inode_dirty(handle, inode);
 | 
				
			||||||
		ret2 = ext4_journal_stop(handle);
 | 
							if (credits)
 | 
				
			||||||
		if (ret <= 0 || ret2 )
 | 
								ret2 = ext4_journal_stop(handle);
 | 
				
			||||||
 | 
							if (ret <= 0 || ret2)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						if (!credits)
 | 
				
			||||||
 | 
							ret2 = ext4_journal_stop(handle);
 | 
				
			||||||
	return ret > 0 ? ret2 : ret;
 | 
						return ret > 0 ? ret2 : ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page,
 | 
				
			||||||
struct mpage_da_data {
 | 
					struct mpage_da_data {
 | 
				
			||||||
	struct inode *inode;
 | 
						struct inode *inode;
 | 
				
			||||||
	struct writeback_control *wbc;
 | 
						struct writeback_control *wbc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pgoff_t first_page;	/* The first page to write */
 | 
						pgoff_t first_page;	/* The first page to write */
 | 
				
			||||||
	pgoff_t next_page;	/* Current page to examine */
 | 
						pgoff_t next_page;	/* Current page to examine */
 | 
				
			||||||
	pgoff_t last_page;	/* Last page to examine */
 | 
						pgoff_t last_page;	/* Last page to examine */
 | 
				
			||||||
| 
						 | 
					@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
 | 
				
			||||||
	err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
 | 
						err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
 | 
				
			||||||
	if (err < 0)
 | 
						if (err < 0)
 | 
				
			||||||
		return err;
 | 
							return err;
 | 
				
			||||||
	if (map->m_flags & EXT4_MAP_UNINIT)
 | 
						if (map->m_flags & EXT4_MAP_UNINIT) {
 | 
				
			||||||
 | 
							if (!mpd->io_submit.io_end->handle &&
 | 
				
			||||||
 | 
							    ext4_handle_valid(handle)) {
 | 
				
			||||||
 | 
								mpd->io_submit.io_end->handle = handle->h_rsv_handle;
 | 
				
			||||||
 | 
								handle->h_rsv_handle = NULL;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
 | 
							ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(map->m_len == 0);
 | 
						BUG_ON(map->m_len == 0);
 | 
				
			||||||
	if (map->m_flags & EXT4_MAP_NEW) {
 | 
						if (map->m_flags & EXT4_MAP_NEW) {
 | 
				
			||||||
| 
						 | 
					@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping,
 | 
				
			||||||
	handle_t *handle = NULL;
 | 
						handle_t *handle = NULL;
 | 
				
			||||||
	struct mpage_da_data mpd;
 | 
						struct mpage_da_data mpd;
 | 
				
			||||||
	struct inode *inode = mapping->host;
 | 
						struct inode *inode = mapping->host;
 | 
				
			||||||
	int needed_blocks, ret = 0;
 | 
						int needed_blocks, rsv_blocks = 0, ret = 0;
 | 
				
			||||||
	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 | 
						struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 | 
				
			||||||
	bool done;
 | 
						bool done;
 | 
				
			||||||
	struct blk_plug plug;
 | 
						struct blk_plug plug;
 | 
				
			||||||
| 
						 | 
					@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping,
 | 
				
			||||||
	if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
 | 
						if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
 | 
				
			||||||
		return -EROFS;
 | 
							return -EROFS;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (ext4_should_dioread_nolock(inode)) {
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * We may need to convert upto one extent per block in
 | 
				
			||||||
 | 
							 * the page and we may dirty the inode.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If we have inline data and arrive here, it means that
 | 
						 * If we have inline data and arrive here, it means that
 | 
				
			||||||
	 * we will soon create the block for the 1st page, so
 | 
						 * we will soon create the block for the 1st page, so
 | 
				
			||||||
| 
						 | 
					@ -2438,8 +2453,8 @@ static int ext4_da_writepages(struct address_space *mapping,
 | 
				
			||||||
		needed_blocks = ext4_da_writepages_trans_blocks(inode);
 | 
							needed_blocks = ext4_da_writepages_trans_blocks(inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* start a new transaction */
 | 
							/* start a new transaction */
 | 
				
			||||||
		handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
 | 
							handle = ext4_journal_start_with_reserve(inode,
 | 
				
			||||||
					    needed_blocks);
 | 
									EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
 | 
				
			||||||
		if (IS_ERR(handle)) {
 | 
							if (IS_ERR(handle)) {
 | 
				
			||||||
			ret = PTR_ERR(handle);
 | 
								ret = PTR_ERR(handle);
 | 
				
			||||||
			ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
 | 
								ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
 | 
				
			||||||
| 
						 | 
					@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 | 
				
			||||||
		 * for non AIO case, since the IO is already
 | 
							 * for non AIO case, since the IO is already
 | 
				
			||||||
		 * completed, we could do the conversion right here
 | 
							 * completed, we could do the conversion right here
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		err = ext4_convert_unwritten_extents(inode,
 | 
							err = ext4_convert_unwritten_extents(NULL, inode,
 | 
				
			||||||
						     offset, ret);
 | 
											     offset, ret);
 | 
				
			||||||
		if (err < 0)
 | 
							if (err < 0)
 | 
				
			||||||
			ret = err;
 | 
								ret = err;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	BUG_ON(!list_empty(&io_end->list));
 | 
						BUG_ON(!list_empty(&io_end->list));
 | 
				
			||||||
	BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
 | 
						BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
 | 
				
			||||||
 | 
						WARN_ON(io_end->handle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
 | 
						if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
 | 
				
			||||||
		wake_up_all(ext4_ioend_wq(io_end->inode));
 | 
							wake_up_all(ext4_ioend_wq(io_end->inode));
 | 
				
			||||||
| 
						 | 
					@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io)
 | 
				
			||||||
	struct inode *inode = io->inode;
 | 
						struct inode *inode = io->inode;
 | 
				
			||||||
	loff_t offset = io->offset;
 | 
						loff_t offset = io->offset;
 | 
				
			||||||
	ssize_t size = io->size;
 | 
						ssize_t size = io->size;
 | 
				
			||||||
 | 
						handle_t *handle = io->handle;
 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
 | 
						ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
 | 
				
			||||||
		   "list->prev 0x%p\n",
 | 
							   "list->prev 0x%p\n",
 | 
				
			||||||
		   io, inode->i_ino, io->list.next, io->list.prev);
 | 
							   io, inode->i_ino, io->list.next, io->list.prev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = ext4_convert_unwritten_extents(inode, offset, size);
 | 
						io->handle = NULL;	/* Following call will use up the handle */
 | 
				
			||||||
 | 
						ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
 | 
				
			||||||
	if (ret < 0) {
 | 
						if (ret < 0) {
 | 
				
			||||||
		ext4_msg(inode->i_sb, KERN_EMERG,
 | 
							ext4_msg(inode->i_sb, KERN_EMERG,
 | 
				
			||||||
			 "failed to convert unwritten extents to written "
 | 
								 "failed to convert unwritten extents to written "
 | 
				
			||||||
| 
						 | 
					@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (atomic_dec_and_test(&io_end->count)) {
 | 
						if (atomic_dec_and_test(&io_end->count)) {
 | 
				
			||||||
		if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
 | 
							if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
 | 
				
			||||||
			err = ext4_convert_unwritten_extents(io_end->inode,
 | 
								err = ext4_convert_unwritten_extents(io_end->handle,
 | 
				
			||||||
						io_end->offset, io_end->size);
 | 
											io_end->inode, io_end->offset,
 | 
				
			||||||
 | 
											io_end->size);
 | 
				
			||||||
 | 
								io_end->handle = NULL;
 | 
				
			||||||
			ext4_clear_io_unwritten_flag(io_end);
 | 
								ext4_clear_io_unwritten_flag(io_end);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		ext4_release_io_end(io_end);
 | 
							ext4_release_io_end(io_end);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue