mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	ext4: DAX iomap write support
Implement DAX writes using the new iomap infrastructure instead of overloading the direct IO path. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
		
							parent
							
								
									47e6935136
								
							
						
					
					
						commit
						776722e85d
					
				
					 2 changed files with 160 additions and 6 deletions
				
			
		|  | @ -169,6 +169,41 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) | |||
| 	return iov_iter_count(from); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_FS_DAX | ||||
| static ssize_t | ||||
| ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) | ||||
| { | ||||
| 	struct inode *inode = file_inode(iocb->ki_filp); | ||||
| 	ssize_t ret; | ||||
| 	bool overwrite = false; | ||||
| 
 | ||||
| 	inode_lock(inode); | ||||
| 	ret = ext4_write_checks(iocb, from); | ||||
| 	if (ret <= 0) | ||||
| 		goto out; | ||||
| 	ret = file_remove_privs(iocb->ki_filp); | ||||
| 	if (ret) | ||||
| 		goto out; | ||||
| 	ret = file_update_time(iocb->ki_filp); | ||||
| 	if (ret) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) { | ||||
| 		overwrite = true; | ||||
| 		downgrade_write(&inode->i_rwsem); | ||||
| 	} | ||||
| 	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); | ||||
| out: | ||||
| 	if (!overwrite) | ||||
| 		inode_unlock(inode); | ||||
| 	else | ||||
| 		inode_unlock_shared(inode); | ||||
| 	if (ret > 0) | ||||
| 		ret = generic_write_sync(iocb, ret); | ||||
| 	return ret; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| static ssize_t | ||||
| ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | ||||
| { | ||||
|  | @ -178,6 +213,11 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
| 	int overwrite = 0; | ||||
| 	ssize_t ret; | ||||
| 
 | ||||
| #ifdef CONFIG_FS_DAX | ||||
| 	if (IS_DAX(inode)) | ||||
| 		return ext4_dax_write_iter(iocb, from); | ||||
| #endif | ||||
| 
 | ||||
| 	inode_lock(inode); | ||||
| 	ret = ext4_write_checks(iocb, from); | ||||
| 	if (ret <= 0) | ||||
|  |  | |||
							
								
								
									
										126
									
								
								fs/ext4/inode.c
									
									
									
									
									
								
							
							
						
						
									
										126
									
								
								fs/ext4/inode.c
									
									
									
									
									
								
							|  | @ -3329,18 +3329,79 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | |||
| 	struct ext4_map_blocks map; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (flags & IOMAP_WRITE) | ||||
| 		return -EIO; | ||||
| 
 | ||||
| 	if (WARN_ON_ONCE(ext4_has_inline_data(inode))) | ||||
| 		return -ERANGE; | ||||
| 
 | ||||
| 	map.m_lblk = first_block; | ||||
| 	map.m_len = last_block - first_block + 1; | ||||
| 
 | ||||
| 	ret = ext4_map_blocks(NULL, inode, &map, 0); | ||||
| 	if (ret < 0) | ||||
| 		return ret; | ||||
| 	if (!(flags & IOMAP_WRITE)) { | ||||
| 		ret = ext4_map_blocks(NULL, inode, &map, 0); | ||||
| 	} else { | ||||
| 		int dio_credits; | ||||
| 		handle_t *handle; | ||||
| 		int retries = 0; | ||||
| 
 | ||||
| 		/* Trim mapping request to maximum we can map at once for DIO */ | ||||
| 		if (map.m_len > DIO_MAX_BLOCKS) | ||||
| 			map.m_len = DIO_MAX_BLOCKS; | ||||
| 		dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); | ||||
| retry: | ||||
| 		/*
 | ||||
| 		 * Either we allocate blocks and then we don't get unwritten | ||||
| 		 * extent so we have reserved enough credits, or the blocks | ||||
| 		 * are already allocated and unwritten and in that case | ||||
| 		 * extent conversion fits in the credits as well. | ||||
| 		 */ | ||||
| 		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||||
| 					    dio_credits); | ||||
| 		if (IS_ERR(handle)) | ||||
| 			return PTR_ERR(handle); | ||||
| 
 | ||||
| 		ret = ext4_map_blocks(handle, inode, &map, | ||||
| 				      EXT4_GET_BLOCKS_PRE_IO | | ||||
| 				      EXT4_GET_BLOCKS_CREATE_ZERO); | ||||
| 		if (ret < 0) { | ||||
| 			ext4_journal_stop(handle); | ||||
| 			if (ret == -ENOSPC && | ||||
| 			    ext4_should_retry_alloc(inode->i_sb, &retries)) | ||||
| 				goto retry; | ||||
| 			return ret; | ||||
| 		} | ||||
| 		/* For DAX writes we need to zero out unwritten extents */ | ||||
| 		if (map.m_flags & EXT4_MAP_UNWRITTEN) { | ||||
| 			/*
 | ||||
| 			 * We are protected by i_mmap_sem or i_rwsem so we know | ||||
| 			 * block cannot go away from under us even though we | ||||
| 			 * dropped i_data_sem. Convert extent to written and | ||||
| 			 * write zeros there. | ||||
| 			 */ | ||||
| 			ret = ext4_map_blocks(handle, inode, &map, | ||||
| 					      EXT4_GET_BLOCKS_CONVERT | | ||||
| 					      EXT4_GET_BLOCKS_CREATE_ZERO); | ||||
| 			if (ret < 0) { | ||||
| 				ext4_journal_stop(handle); | ||||
| 				return ret; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If we added blocks beyond i_size we need to make sure they | ||||
| 		 * will get truncated if we crash before updating i_size in | ||||
| 		 * ext4_iomap_end(). | ||||
| 		 */ | ||||
| 		if (first_block + map.m_len > | ||||
| 		    (inode->i_size + (1 << blkbits) - 1) >> blkbits) { | ||||
| 			int err; | ||||
| 
 | ||||
| 			err = ext4_orphan_add(handle, inode); | ||||
| 			if (err < 0) { | ||||
| 				ext4_journal_stop(handle); | ||||
| 				return err; | ||||
| 			} | ||||
| 		} | ||||
| 		ext4_journal_stop(handle); | ||||
| 	} | ||||
| 
 | ||||
| 	iomap->flags = 0; | ||||
| 	iomap->bdev = inode->i_sb->s_bdev; | ||||
|  | @ -3368,8 +3429,61 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, | ||||
| 			  ssize_t written, unsigned flags, struct iomap *iomap) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 	handle_t *handle; | ||||
| 	int blkbits = inode->i_blkbits; | ||||
| 	bool truncate = false; | ||||
| 
 | ||||
| 	if (!(flags & IOMAP_WRITE)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||||
| 	if (IS_ERR(handle)) { | ||||
| 		ret = PTR_ERR(handle); | ||||
| 		goto orphan_del; | ||||
| 	} | ||||
| 	if (ext4_update_inode_size(inode, offset + written)) | ||||
| 		ext4_mark_inode_dirty(handle, inode); | ||||
| 	/*
 | ||||
| 	 * We may need to truncate allocated but not written blocks beyond EOF. | ||||
| 	 */ | ||||
| 	if (iomap->offset + iomap->length >  | ||||
| 	    ALIGN(inode->i_size, 1 << blkbits)) { | ||||
| 		ext4_lblk_t written_blk, end_blk; | ||||
| 
 | ||||
| 		written_blk = (offset + written) >> blkbits; | ||||
| 		end_blk = (offset + length) >> blkbits; | ||||
| 		if (written_blk < end_blk && ext4_can_truncate(inode)) | ||||
| 			truncate = true; | ||||
| 	} | ||||
| 	/*
 | ||||
| 	 * Remove inode from orphan list if we were extending a inode and | ||||
| 	 * everything went fine. | ||||
| 	 */ | ||||
| 	if (!truncate && inode->i_nlink && | ||||
| 	    !list_empty(&EXT4_I(inode)->i_orphan)) | ||||
| 		ext4_orphan_del(handle, inode); | ||||
| 	ext4_journal_stop(handle); | ||||
| 	if (truncate) { | ||||
| 		ext4_truncate_failed_write(inode); | ||||
| orphan_del: | ||||
| 		/*
 | ||||
| 		 * If truncate failed early the inode might still be on the | ||||
| 		 * orphan list; we need to make sure the inode is removed from | ||||
| 		 * the orphan list in that case. | ||||
| 		 */ | ||||
| 		if (inode->i_nlink) | ||||
| 			ext4_orphan_del(NULL, inode); | ||||
| 	} | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| struct iomap_ops ext4_iomap_ops = { | ||||
| 	.iomap_begin		= ext4_iomap_begin, | ||||
| 	.iomap_end		= ext4_iomap_end, | ||||
| }; | ||||
| 
 | ||||
| #else | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Jan Kara
						Jan Kara