mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	zonefs: use iomap for synchronous direct writes
Remove the function zonefs_file_dio_append() that is used to manually issue REQ_OP_ZONE_APPEND BIOs for processing synchronous direct writes and use iomap instead. To preserve the use of zone append operations for synchronous writes, different struct iomap_dio_ops are defined. For synchronous direct writes using zone append, zonefs_zone_append_dio_ops is introduced. The submit_bio operation of this structure is defined as the function zonefs_file_zone_append_dio_submit_io() which is used to change the BIO opreation for synchronous direct IO writes to REQ_OP_ZONE_APPEND. In order to preserve the write location check on completion of zone append BIOs, the end_io operation is also defined using the function zonefs_file_zone_append_dio_bio_end_io(). This check now relies on the zonefs_zone_append_bio structure, allocated together with zone append BIOs with a dedicated BIO set. This structure include the target inode of a zone append BIO as well as the target append offset location for the zone append operation. This is used to perform a check against bio->bi_iter.bi_sector when the BIO completes, without needing to use the zone information z_wpoffset field, thus removing the need for taking the inode truncate mutex. Signed-off-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
This commit is contained in:
		
							parent
							
								
									9561de3a55
								
							
						
					
					
						commit
						16d7fd3cfa
					
				
					 3 changed files with 120 additions and 97 deletions
				
			
		
							
								
								
									
										206
									
								
								fs/zonefs/file.c
									
									
									
									
									
								
							
							
						
						
									
										206
									
								
								fs/zonefs/file.c
									
									
									
									
									
								
							|  | @ -342,6 +342,77 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) | ||||||
| 	return generic_file_llseek_size(file, offset, whence, isize, isize); | 	return generic_file_llseek_size(file, offset, whence, isize, isize); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | struct zonefs_zone_append_bio { | ||||||
|  | 	/* The target inode of the BIO */ | ||||||
|  | 	struct inode *inode; | ||||||
|  | 
 | ||||||
|  | 	/* For sync writes, the target append write offset */ | ||||||
|  | 	u64 append_offset; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * This member must come last, bio_alloc_bioset will allocate enough | ||||||
|  | 	 * bytes for entire zonefs_bio but relies on bio being last. | ||||||
|  | 	 */ | ||||||
|  | 	struct bio bio; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static inline struct zonefs_zone_append_bio * | ||||||
|  | zonefs_zone_append_bio(struct bio *bio) | ||||||
|  | { | ||||||
|  | 	return container_of(bio, struct zonefs_zone_append_bio, bio); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio) | ||||||
|  | { | ||||||
|  | 	struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio); | ||||||
|  | 	struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode); | ||||||
|  | 	sector_t za_sector; | ||||||
|  | 
 | ||||||
|  | 	if (bio->bi_status != BLK_STS_OK) | ||||||
|  | 		goto bio_end; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * If the file zone was written underneath the file system, the zone | ||||||
|  | 	 * append operation can still succedd (if the zone is not full) but | ||||||
|  | 	 * the write append location will not be where we expect it to be. | ||||||
|  | 	 * Check that we wrote where we intended to, that is, at z->z_wpoffset. | ||||||
|  | 	 */ | ||||||
|  | 	za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT); | ||||||
|  | 	if (bio->bi_iter.bi_sector != za_sector) { | ||||||
|  | 		zonefs_warn(za_bio->inode->i_sb, | ||||||
|  | 			    "Invalid write sector %llu for zone at %llu\n", | ||||||
|  | 			    bio->bi_iter.bi_sector, z->z_sector); | ||||||
|  | 		bio->bi_status = BLK_STS_IOERR; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | bio_end: | ||||||
|  | 	iomap_dio_bio_end_io(bio); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter, | ||||||
|  | 						  struct bio *bio, | ||||||
|  | 						  loff_t file_offset) | ||||||
|  | { | ||||||
|  | 	struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio); | ||||||
|  | 	struct inode *inode = iter->inode; | ||||||
|  | 	struct zonefs_zone *z = zonefs_inode_zone(inode); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Issue a zone append BIO to process sync dio writes. The append | ||||||
|  | 	 * file offset is saved to check the zone append write location | ||||||
|  | 	 * on completion of the BIO. | ||||||
|  | 	 */ | ||||||
|  | 	za_bio->inode = inode; | ||||||
|  | 	za_bio->append_offset = file_offset; | ||||||
|  | 
 | ||||||
|  | 	bio->bi_opf &= ~REQ_OP_WRITE; | ||||||
|  | 	bio->bi_opf |= REQ_OP_ZONE_APPEND; | ||||||
|  | 	bio->bi_iter.bi_sector = z->z_sector; | ||||||
|  | 	bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io; | ||||||
|  | 
 | ||||||
|  | 	submit_bio(bio); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, | static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, | ||||||
| 					int error, unsigned int flags) | 					int error, unsigned int flags) | ||||||
| { | { | ||||||
|  | @ -372,93 +443,17 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static const struct iomap_dio_ops zonefs_write_dio_ops = { | static struct bio_set zonefs_zone_append_bio_set; | ||||||
| 	.end_io			= zonefs_file_write_dio_end_io, | 
 | ||||||
|  | static const struct iomap_dio_ops zonefs_zone_append_dio_ops = { | ||||||
|  | 	.submit_io	= zonefs_file_zone_append_dio_submit_io, | ||||||
|  | 	.end_io		= zonefs_file_write_dio_end_io, | ||||||
|  | 	.bio_set	= &zonefs_zone_append_bio_set, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) | static const struct iomap_dio_ops zonefs_write_dio_ops = { | ||||||
| { | 	.end_io		= zonefs_file_write_dio_end_io, | ||||||
| 	struct inode *inode = file_inode(iocb->ki_filp); | }; | ||||||
| 	struct zonefs_zone *z = zonefs_inode_zone(inode); |  | ||||||
| 	struct block_device *bdev = inode->i_sb->s_bdev; |  | ||||||
| 	unsigned int max = bdev_max_zone_append_sectors(bdev); |  | ||||||
| 	pgoff_t start, end; |  | ||||||
| 	struct bio *bio; |  | ||||||
| 	ssize_t size = 0; |  | ||||||
| 	int nr_pages; |  | ||||||
| 	ssize_t ret; |  | ||||||
| 
 |  | ||||||
| 	max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); |  | ||||||
| 	iov_iter_truncate(from, max); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * If the inode block size (zone write granularity) is smaller than the |  | ||||||
| 	 * page size, we may be appending data belonging to the last page of the |  | ||||||
| 	 * inode straddling inode->i_size, with that page already cached due to |  | ||||||
| 	 * a buffered read or readahead. So make sure to invalidate that page. |  | ||||||
| 	 * This will always be a no-op for the case where the block size is |  | ||||||
| 	 * equal to the page size. |  | ||||||
| 	 */ |  | ||||||
| 	start = iocb->ki_pos >> PAGE_SHIFT; |  | ||||||
| 	end = (iocb->ki_pos + iov_iter_count(from) - 1) >> PAGE_SHIFT; |  | ||||||
| 	if (invalidate_inode_pages2_range(inode->i_mapping, start, end)) |  | ||||||
| 		return -EBUSY; |  | ||||||
| 
 |  | ||||||
| 	nr_pages = iov_iter_npages(from, BIO_MAX_VECS); |  | ||||||
| 	if (!nr_pages) |  | ||||||
| 		return 0; |  | ||||||
| 
 |  | ||||||
| 	bio = bio_alloc(bdev, nr_pages, |  | ||||||
| 			REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS); |  | ||||||
| 	bio->bi_iter.bi_sector = z->z_sector; |  | ||||||
| 	bio->bi_ioprio = iocb->ki_ioprio; |  | ||||||
| 	if (iocb_is_dsync(iocb)) |  | ||||||
| 		bio->bi_opf |= REQ_FUA; |  | ||||||
| 
 |  | ||||||
| 	ret = bio_iov_iter_get_pages(bio, from); |  | ||||||
| 	if (unlikely(ret)) |  | ||||||
| 		goto out_release; |  | ||||||
| 
 |  | ||||||
| 	size = bio->bi_iter.bi_size; |  | ||||||
| 	task_io_account_write(size); |  | ||||||
| 
 |  | ||||||
| 	if (iocb->ki_flags & IOCB_HIPRI) |  | ||||||
| 		bio_set_polled(bio, iocb); |  | ||||||
| 
 |  | ||||||
| 	ret = submit_bio_wait(bio); |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * If the file zone was written underneath the file system, the zone |  | ||||||
| 	 * write pointer may not be where we expect it to be, but the zone |  | ||||||
| 	 * append write can still succeed. So check manually that we wrote where |  | ||||||
| 	 * we intended to, that is, at zi->i_wpoffset. |  | ||||||
| 	 */ |  | ||||||
| 	if (!ret) { |  | ||||||
| 		sector_t wpsector = |  | ||||||
| 			z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT); |  | ||||||
| 
 |  | ||||||
| 		if (bio->bi_iter.bi_sector != wpsector) { |  | ||||||
| 			zonefs_warn(inode->i_sb, |  | ||||||
| 				"Corrupted write pointer %llu for zone at %llu\n", |  | ||||||
| 				bio->bi_iter.bi_sector, z->z_sector); |  | ||||||
| 			ret = -EIO; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	zonefs_file_write_dio_end_io(iocb, size, ret, 0); |  | ||||||
| 	trace_zonefs_file_dio_append(inode, size, ret); |  | ||||||
| 
 |  | ||||||
| out_release: |  | ||||||
| 	bio_release_pages(bio, false); |  | ||||||
| 	bio_put(bio); |  | ||||||
| 
 |  | ||||||
| 	if (ret >= 0) { |  | ||||||
| 		iocb->ki_pos += size; |  | ||||||
| 		return size; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return ret; |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Do not exceed the LFS limits nor the file zone size. If pos is under the |  * Do not exceed the LFS limits nor the file zone size. If pos is under the | ||||||
|  | @ -539,6 +534,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) | ||||||
| 	struct zonefs_inode_info *zi = ZONEFS_I(inode); | 	struct zonefs_inode_info *zi = ZONEFS_I(inode); | ||||||
| 	struct zonefs_zone *z = zonefs_inode_zone(inode); | 	struct zonefs_zone *z = zonefs_inode_zone(inode); | ||||||
| 	struct super_block *sb = inode->i_sb; | 	struct super_block *sb = inode->i_sb; | ||||||
|  | 	const struct iomap_dio_ops *dio_ops; | ||||||
| 	bool sync = is_sync_kiocb(iocb); | 	bool sync = is_sync_kiocb(iocb); | ||||||
| 	bool append = false; | 	bool append = false; | ||||||
| 	ssize_t ret, count; | 	ssize_t ret, count; | ||||||
|  | @ -582,20 +578,26 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (append) { | 	if (append) { | ||||||
| 		ret = zonefs_file_dio_append(iocb, from); | 		unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev); | ||||||
|  | 
 | ||||||
|  | 		max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize); | ||||||
|  | 		iov_iter_truncate(from, max); | ||||||
|  | 
 | ||||||
|  | 		dio_ops = &zonefs_zone_append_dio_ops; | ||||||
| 	} else { | 	} else { | ||||||
| 		/*
 | 		dio_ops = &zonefs_write_dio_ops; | ||||||
| 		 * iomap_dio_rw() may return ENOTBLK if there was an issue with |  | ||||||
| 		 * page invalidation. Overwrite that error code with EBUSY to |  | ||||||
| 		 * be consistent with zonefs_file_dio_append() return value for |  | ||||||
| 		 * similar issues. |  | ||||||
| 		 */ |  | ||||||
| 		ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, |  | ||||||
| 				   &zonefs_write_dio_ops, 0, NULL, 0); |  | ||||||
| 		if (ret == -ENOTBLK) |  | ||||||
| 			ret = -EBUSY; |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * iomap_dio_rw() may return ENOTBLK if there was an issue with | ||||||
|  | 	 * page invalidation. Overwrite that error code with EBUSY so that | ||||||
|  | 	 * the user can make sense of the error. | ||||||
|  | 	 */ | ||||||
|  | 	ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops, | ||||||
|  | 			   dio_ops, 0, NULL, 0); | ||||||
|  | 	if (ret == -ENOTBLK) | ||||||
|  | 		ret = -EBUSY; | ||||||
|  | 
 | ||||||
| 	if (zonefs_zone_is_seq(z) && | 	if (zonefs_zone_is_seq(z) && | ||||||
| 	    (ret > 0 || ret == -EIOCBQUEUED)) { | 	    (ret > 0 || ret == -EIOCBQUEUED)) { | ||||||
| 		if (ret > 0) | 		if (ret > 0) | ||||||
|  | @ -900,3 +902,15 @@ const struct file_operations zonefs_file_operations = { | ||||||
| 	.splice_write	= iter_file_splice_write, | 	.splice_write	= iter_file_splice_write, | ||||||
| 	.iopoll		= iocb_bio_iopoll, | 	.iopoll		= iocb_bio_iopoll, | ||||||
| }; | }; | ||||||
|  | 
 | ||||||
|  | int zonefs_file_bioset_init(void) | ||||||
|  | { | ||||||
|  | 	return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE, | ||||||
|  | 			   offsetof(struct zonefs_zone_append_bio, bio), | ||||||
|  | 			   BIOSET_NEED_BVECS); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void zonefs_file_bioset_exit(void) | ||||||
|  | { | ||||||
|  | 	bioset_exit(&zonefs_zone_append_bio_set); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -1412,10 +1412,14 @@ static int __init zonefs_init(void) | ||||||
| 
 | 
 | ||||||
| 	BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); | 	BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); | ||||||
| 
 | 
 | ||||||
| 	ret = zonefs_init_inodecache(); | 	ret = zonefs_file_bioset_init(); | ||||||
| 	if (ret) | 	if (ret) | ||||||
| 		return ret; | 		return ret; | ||||||
| 
 | 
 | ||||||
|  | 	ret = zonefs_init_inodecache(); | ||||||
|  | 	if (ret) | ||||||
|  | 		goto destroy_bioset; | ||||||
|  | 
 | ||||||
| 	ret = zonefs_sysfs_init(); | 	ret = zonefs_sysfs_init(); | ||||||
| 	if (ret) | 	if (ret) | ||||||
| 		goto destroy_inodecache; | 		goto destroy_inodecache; | ||||||
|  | @ -1430,6 +1434,8 @@ static int __init zonefs_init(void) | ||||||
| 	zonefs_sysfs_exit(); | 	zonefs_sysfs_exit(); | ||||||
| destroy_inodecache: | destroy_inodecache: | ||||||
| 	zonefs_destroy_inodecache(); | 	zonefs_destroy_inodecache(); | ||||||
|  | destroy_bioset: | ||||||
|  | 	zonefs_file_bioset_exit(); | ||||||
| 
 | 
 | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
|  | @ -1439,6 +1445,7 @@ static void __exit zonefs_exit(void) | ||||||
| 	unregister_filesystem(&zonefs_type); | 	unregister_filesystem(&zonefs_type); | ||||||
| 	zonefs_sysfs_exit(); | 	zonefs_sysfs_exit(); | ||||||
| 	zonefs_destroy_inodecache(); | 	zonefs_destroy_inodecache(); | ||||||
|  | 	zonefs_file_bioset_exit(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| MODULE_AUTHOR("Damien Le Moal"); | MODULE_AUTHOR("Damien Le Moal"); | ||||||
|  |  | ||||||
|  | @ -279,6 +279,8 @@ extern const struct file_operations zonefs_dir_operations; | ||||||
| extern const struct address_space_operations zonefs_file_aops; | extern const struct address_space_operations zonefs_file_aops; | ||||||
| extern const struct file_operations zonefs_file_operations; | extern const struct file_operations zonefs_file_operations; | ||||||
| int zonefs_file_truncate(struct inode *inode, loff_t isize); | int zonefs_file_truncate(struct inode *inode, loff_t isize); | ||||||
|  | int zonefs_file_bioset_init(void); | ||||||
|  | void zonefs_file_bioset_exit(void); | ||||||
| 
 | 
 | ||||||
| /* In sysfs.c */ | /* In sysfs.c */ | ||||||
| int zonefs_sysfs_register(struct super_block *sb); | int zonefs_sysfs_register(struct super_block *sb); | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Damien Le Moal
						Damien Le Moal