mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	btrfs: zoned: write out partially allocated region
cow_file_range() works in an all-or-nothing way: if it fails to allocate an
extent for a part of the given region, it gives up all the region including
the successfully allocated parts. On cow_file_range(), run_delalloc_zoned()
writes data for the region only when it successfully allocate all the
region.
This all-or-nothing allocation and write-out are problematic when available
space in all the block groups are get tight with the active zone
restriction. btrfs_reserve_extent() try hard to utilize the left space in
the active block groups and gives up finally and fails with
-ENOSPC. However, if we send IOs for the successfully allocated region, we
can finish a zone and can continue on the rest of the allocation on a newly
allocated block group.
This patch implements the partial write-out for run_delalloc_zoned(). With
this patch applied, cow_file_range() returns -EAGAIN to tell the caller to
do something to progress the further allocation, and tells the successfully
allocated region with done_offset. Furthermore, the zoned extent allocator
returns -EAGAIN to tell cow_file_range() going back to the caller side.
Actually, we still need to wait for an IO to complete to continue the
allocation. The next patch implements that part.
CC: stable@vger.kernel.org # 5.16+
Fixes: afba2bc036 ("btrfs: zoned: implement active zone tracking")
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
			
			
This commit is contained in:
		
							parent
							
								
									b6a98021e4
								
							
						
					
					
						commit
						898793d992
					
				
					 2 changed files with 59 additions and 14 deletions
				
			
		| 
						 | 
					@ -3996,6 +3996,16 @@ static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
	if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
 | 
						if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
 | 
				
			||||||
		return -ENOSPC;
 | 
							return -ENOSPC;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Even min_alloc_size is not left in any block groups. Since we cannot
 | 
				
			||||||
 | 
						 * activate a new block group, allocating it may not help. Let's tell a
 | 
				
			||||||
 | 
						 * caller to try again and hope it progress something by writing some
 | 
				
			||||||
 | 
						 * parts of the region. That is only possible for data block groups,
 | 
				
			||||||
 | 
						 * where a part of the region can be written.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
 | 
				
			||||||
 | 
							return -EAGAIN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We cannot activate a new block group and no enough space left in any
 | 
						 * We cannot activate a new block group and no enough space left in any
 | 
				
			||||||
	 * block groups. So, allocating a new block group may not help. But,
 | 
						 * block groups. So, allocating a new block group may not help. But,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -117,7 +117,8 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback);
 | 
				
			||||||
static noinline int cow_file_range(struct btrfs_inode *inode,
 | 
					static noinline int cow_file_range(struct btrfs_inode *inode,
 | 
				
			||||||
				   struct page *locked_page,
 | 
									   struct page *locked_page,
 | 
				
			||||||
				   u64 start, u64 end, int *page_started,
 | 
									   u64 start, u64 end, int *page_started,
 | 
				
			||||||
				   unsigned long *nr_written, int unlock);
 | 
									   unsigned long *nr_written, int unlock,
 | 
				
			||||||
 | 
									   u64 *done_offset);
 | 
				
			||||||
static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 | 
					static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 | 
				
			||||||
				       u64 len, u64 orig_start, u64 block_start,
 | 
									       u64 len, u64 orig_start, u64 block_start,
 | 
				
			||||||
				       u64 block_len, u64 orig_block_len,
 | 
									       u64 block_len, u64 orig_block_len,
 | 
				
			||||||
| 
						 | 
					@ -921,7 +922,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
 | 
				
			||||||
	 * can directly submit them without interruption.
 | 
						 * can directly submit them without interruption.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	ret = cow_file_range(inode, locked_page, start, end, &page_started,
 | 
						ret = cow_file_range(inode, locked_page, start, end, &page_started,
 | 
				
			||||||
			     &nr_written, 0);
 | 
								     &nr_written, 0, NULL);
 | 
				
			||||||
	/* Inline extent inserted, page gets unlocked and everything is done */
 | 
						/* Inline extent inserted, page gets unlocked and everything is done */
 | 
				
			||||||
	if (page_started) {
 | 
						if (page_started) {
 | 
				
			||||||
		ret = 0;
 | 
							ret = 0;
 | 
				
			||||||
| 
						 | 
					@ -1170,7 +1171,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
 | 
				
			||||||
static noinline int cow_file_range(struct btrfs_inode *inode,
 | 
					static noinline int cow_file_range(struct btrfs_inode *inode,
 | 
				
			||||||
				   struct page *locked_page,
 | 
									   struct page *locked_page,
 | 
				
			||||||
				   u64 start, u64 end, int *page_started,
 | 
									   u64 start, u64 end, int *page_started,
 | 
				
			||||||
				   unsigned long *nr_written, int unlock)
 | 
									   unsigned long *nr_written, int unlock,
 | 
				
			||||||
 | 
									   u64 *done_offset)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btrfs_root *root = inode->root;
 | 
						struct btrfs_root *root = inode->root;
 | 
				
			||||||
	struct btrfs_fs_info *fs_info = root->fs_info;
 | 
						struct btrfs_fs_info *fs_info = root->fs_info;
 | 
				
			||||||
| 
						 | 
					@ -1363,6 +1365,21 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 | 
				
			||||||
	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 | 
						btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 | 
				
			||||||
	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
 | 
						btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
 | 
				
			||||||
out_unlock:
 | 
					out_unlock:
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * If done_offset is non-NULL and ret == -EAGAIN, we expect the
 | 
				
			||||||
 | 
						 * caller to write out the successfully allocated region and retry.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (done_offset && ret == -EAGAIN) {
 | 
				
			||||||
 | 
							if (orig_start < start)
 | 
				
			||||||
 | 
								*done_offset = start - 1;
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
								*done_offset = start;
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
						} else if (ret == -EAGAIN) {
 | 
				
			||||||
 | 
							/* Convert to -ENOSPC since the caller cannot retry. */
 | 
				
			||||||
 | 
							ret = -ENOSPC;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Now, we have three regions to clean up:
 | 
						 * Now, we have three regions to clean up:
 | 
				
			||||||
	 *
 | 
						 *
 | 
				
			||||||
| 
						 | 
					@ -1608,19 +1625,37 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
 | 
				
			||||||
				       u64 end, int *page_started,
 | 
									       u64 end, int *page_started,
 | 
				
			||||||
				       unsigned long *nr_written)
 | 
									       unsigned long *nr_written)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						u64 done_offset = end;
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
						bool locked_page_done = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = cow_file_range(inode, locked_page, start, end, page_started,
 | 
						while (start <= end) {
 | 
				
			||||||
			     nr_written, 0);
 | 
							ret = cow_file_range(inode, locked_page, start, end, page_started,
 | 
				
			||||||
	if (ret)
 | 
									     nr_written, 0, &done_offset);
 | 
				
			||||||
		return ret;
 | 
							if (ret && ret != -EAGAIN)
 | 
				
			||||||
 | 
								return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (*page_started)
 | 
							if (*page_started) {
 | 
				
			||||||
		return 0;
 | 
								ASSERT(ret == 0);
 | 
				
			||||||
 | 
								return 0;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (ret == 0)
 | 
				
			||||||
 | 
								done_offset = end;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (done_offset == start)
 | 
				
			||||||
 | 
								return -ENOSPC;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (!locked_page_done) {
 | 
				
			||||||
 | 
								__set_page_dirty_nobuffers(locked_page);
 | 
				
			||||||
 | 
								account_page_redirty(locked_page);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							locked_page_done = true;
 | 
				
			||||||
 | 
							extent_write_locked_range(&inode->vfs_inode, start, done_offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							start = done_offset + 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__set_page_dirty_nobuffers(locked_page);
 | 
					 | 
				
			||||||
	account_page_redirty(locked_page);
 | 
					 | 
				
			||||||
	extent_write_locked_range(&inode->vfs_inode, start, end);
 | 
					 | 
				
			||||||
	*page_started = 1;
 | 
						*page_started = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
| 
						 | 
					@ -1712,7 +1747,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return cow_file_range(inode, locked_page, start, end, page_started,
 | 
						return cow_file_range(inode, locked_page, start, end, page_started,
 | 
				
			||||||
			      nr_written, 1);
 | 
								      nr_written, 1, NULL);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct can_nocow_file_extent_args {
 | 
					struct can_nocow_file_extent_args {
 | 
				
			||||||
| 
						 | 
					@ -2185,7 +2220,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 | 
				
			||||||
						 page_started, nr_written);
 | 
											 page_started, nr_written);
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
			ret = cow_file_range(inode, locked_page, start, end,
 | 
								ret = cow_file_range(inode, locked_page, start, end,
 | 
				
			||||||
					     page_started, nr_written, 1);
 | 
										     page_started, nr_written, 1, NULL);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
 | 
							set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
 | 
				
			||||||
		ret = cow_file_range_async(inode, wbc, locked_page, start, end,
 | 
							ret = cow_file_range_async(inode, wbc, locked_page, start, end,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue