mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	btrfs: zoned: implement active zone tracking
Add zone_is_active flag to btrfs_block_group. This flag indicates the
underlying zones are all active. Such zone active block groups are tracked
by fs_info->active_bg_list.
btrfs_dev_{set,clear}_active_zone() take responsibility for the underlying
device part. They set/clear the bitmap to indicate zone activeness and
count the number of zones we can activate left.
btrfs_zone_{activate,finish}() take responsibility for the logical part and
the list management. In addition, btrfs_zone_finish() wait for any writes
on it and send REQ_OP_ZONE_FINISH to the zone.
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
			
			
This commit is contained in:
		
							parent
							
								
									dafc340dbd
								
							
						
					
					
						commit
						afba2bc036
					
				
					 7 changed files with 226 additions and 2 deletions
				
			
		|  | @ -1896,6 +1896,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( | |||
| 	INIT_LIST_HEAD(&cache->discard_list); | ||||
| 	INIT_LIST_HEAD(&cache->dirty_list); | ||||
| 	INIT_LIST_HEAD(&cache->io_list); | ||||
| 	INIT_LIST_HEAD(&cache->active_bg_list); | ||||
| 	btrfs_init_free_space_ctl(cache, cache->free_space_ctl); | ||||
| 	atomic_set(&cache->frozen, 0); | ||||
| 	mutex_init(&cache->free_space_lock); | ||||
|  | @ -3842,6 +3843,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 	} | ||||
| 	spin_unlock(&info->unused_bgs_lock); | ||||
| 
 | ||||
| 	spin_lock(&info->zone_active_bgs_lock); | ||||
| 	while (!list_empty(&info->zone_active_bgs)) { | ||||
| 		block_group = list_first_entry(&info->zone_active_bgs, | ||||
| 					       struct btrfs_block_group, | ||||
| 					       active_bg_list); | ||||
| 		list_del_init(&block_group->active_bg_list); | ||||
| 		btrfs_put_block_group(block_group); | ||||
| 	} | ||||
| 	spin_unlock(&info->zone_active_bgs_lock); | ||||
| 
 | ||||
| 	spin_lock(&info->block_group_cache_lock); | ||||
| 	while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | ||||
| 		block_group = rb_entry(n, struct btrfs_block_group, | ||||
|  |  | |||
|  | @ -98,6 +98,7 @@ struct btrfs_block_group { | |||
| 	unsigned int to_copy:1; | ||||
| 	unsigned int relocating_repair:1; | ||||
| 	unsigned int chunk_item_inserted:1; | ||||
| 	unsigned int zone_is_active:1; | ||||
| 
 | ||||
| 	int disk_cache_state; | ||||
| 
 | ||||
|  | @ -205,6 +206,7 @@ struct btrfs_block_group { | |||
| 	u64 zone_capacity; | ||||
| 	u64 meta_write_pointer; | ||||
| 	struct map_lookup *physical_map; | ||||
| 	struct list_head active_bg_list; | ||||
| }; | ||||
| 
 | ||||
| static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) | ||||
|  |  | |||
|  | @ -1018,6 +1018,9 @@ struct btrfs_fs_info { | |||
| 	spinlock_t treelog_bg_lock; | ||||
| 	u64 treelog_bg; | ||||
| 
 | ||||
| 	spinlock_t zone_active_bgs_lock; | ||||
| 	struct list_head zone_active_bgs; | ||||
| 
 | ||||
| #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||||
| 	spinlock_t ref_verify_lock; | ||||
| 	struct rb_root block_tree; | ||||
|  |  | |||
|  | @ -2884,6 +2884,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) | |||
| 	spin_lock_init(&fs_info->buffer_lock); | ||||
| 	spin_lock_init(&fs_info->unused_bgs_lock); | ||||
| 	spin_lock_init(&fs_info->treelog_bg_lock); | ||||
| 	spin_lock_init(&fs_info->zone_active_bgs_lock); | ||||
| 	rwlock_init(&fs_info->tree_mod_log_lock); | ||||
| 	mutex_init(&fs_info->unused_bg_unpin_mutex); | ||||
| 	mutex_init(&fs_info->reclaim_bgs_lock); | ||||
|  | @ -2897,6 +2898,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) | |||
| 	INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | ||||
| 	INIT_LIST_HEAD(&fs_info->unused_bgs); | ||||
| 	INIT_LIST_HEAD(&fs_info->reclaim_bgs); | ||||
| 	INIT_LIST_HEAD(&fs_info->zone_active_bgs); | ||||
| #ifdef CONFIG_BTRFS_DEBUG | ||||
| 	INIT_LIST_HEAD(&fs_info->allocated_roots); | ||||
| 	INIT_LIST_HEAD(&fs_info->allocated_ebs); | ||||
|  |  | |||
|  | @ -2763,8 +2763,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group, | |||
| 	 * out the free space after the allocation offset. | ||||
| 	 */ | ||||
| 	if (btrfs_is_zoned(fs_info)) { | ||||
| 		btrfs_info(fs_info, "free space %llu", | ||||
| 			   block_group->zone_capacity - block_group->alloc_offset); | ||||
| 		btrfs_info(fs_info, "free space %llu active %d", | ||||
| 			   block_group->zone_capacity - block_group->alloc_offset, | ||||
| 			   block_group->zone_is_active); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										193
									
								
								fs/btrfs/zoned.c
									
									
									
									
									
								
							
							
						
						
									
										193
									
								
								fs/btrfs/zoned.c
									
									
									
									
									
								
							|  | @ -989,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, | |||
| 	return pos; | ||||
| } | ||||
| 
 | ||||
| static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos) | ||||
| { | ||||
| 	struct btrfs_zoned_device_info *zone_info = device->zone_info; | ||||
| 	unsigned int zno = (pos >> zone_info->zone_size_shift); | ||||
| 
 | ||||
| 	/* We can use any number of zones */ | ||||
| 	if (zone_info->max_active_zones == 0) | ||||
| 		return true; | ||||
| 
 | ||||
| 	if (!test_bit(zno, zone_info->active_zones)) { | ||||
| 		/* Active zone left? */ | ||||
| 		if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) | ||||
| 			return false; | ||||
| 		if (test_and_set_bit(zno, zone_info->active_zones)) { | ||||
| 			/* Someone already set the bit */ | ||||
| 			atomic_inc(&zone_info->active_zones_left); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos) | ||||
| { | ||||
| 	struct btrfs_zoned_device_info *zone_info = device->zone_info; | ||||
| 	unsigned int zno = (pos >> zone_info->zone_size_shift); | ||||
| 
 | ||||
| 	/* We can use any number of zones */ | ||||
| 	if (zone_info->max_active_zones == 0) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (test_and_clear_bit(zno, zone_info->active_zones)) | ||||
| 		atomic_inc(&zone_info->active_zones_left); | ||||
| } | ||||
| 
 | ||||
| int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, | ||||
| 			    u64 length, u64 *bytes) | ||||
| { | ||||
|  | @ -1004,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, | |||
| 	*bytes = length; | ||||
| 	while (length) { | ||||
| 		btrfs_dev_set_zone_empty(device, physical); | ||||
| 		btrfs_dev_clear_active_zone(device, physical); | ||||
| 		physical += device->zone_info->zone_size; | ||||
| 		length -= device->zone_info->zone_size; | ||||
| 	} | ||||
|  | @ -1656,3 +1692,160 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, | |||
| 
 | ||||
| 	return device; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Activate block group and underlying device zones | ||||
|  * | ||||
|  * @block_group: the block group to activate | ||||
|  * | ||||
|  * Return: true on success, false otherwise | ||||
|  */ | ||||
| bool btrfs_zone_activate(struct btrfs_block_group *block_group) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = block_group->fs_info; | ||||
| 	struct map_lookup *map; | ||||
| 	struct btrfs_device *device; | ||||
| 	u64 physical; | ||||
| 	bool ret; | ||||
| 
 | ||||
| 	if (!btrfs_is_zoned(block_group->fs_info)) | ||||
| 		return true; | ||||
| 
 | ||||
| 	map = block_group->physical_map; | ||||
| 	/* Currently support SINGLE profile only */ | ||||
| 	ASSERT(map->num_stripes == 1); | ||||
| 	device = map->stripes[0].dev; | ||||
| 	physical = map->stripes[0].physical; | ||||
| 
 | ||||
| 	if (device->zone_info->max_active_zones == 0) | ||||
| 		return true; | ||||
| 
 | ||||
| 	spin_lock(&block_group->lock); | ||||
| 
 | ||||
| 	if (block_group->zone_is_active) { | ||||
| 		ret = true; | ||||
| 		goto out_unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	/* No space left */ | ||||
| 	if (block_group->alloc_offset == block_group->zone_capacity) { | ||||
| 		ret = false; | ||||
| 		goto out_unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!btrfs_dev_set_active_zone(device, physical)) { | ||||
| 		/* Cannot activate the zone */ | ||||
| 		ret = false; | ||||
| 		goto out_unlock; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Successfully activated all the zones */ | ||||
| 	block_group->zone_is_active = 1; | ||||
| 
 | ||||
| 	spin_unlock(&block_group->lock); | ||||
| 
 | ||||
| 	/* For the active block group list */ | ||||
| 	btrfs_get_block_group(block_group); | ||||
| 
 | ||||
| 	spin_lock(&fs_info->zone_active_bgs_lock); | ||||
| 	ASSERT(list_empty(&block_group->active_bg_list)); | ||||
| 	list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); | ||||
| 	spin_unlock(&fs_info->zone_active_bgs_lock); | ||||
| 
 | ||||
| 	return true; | ||||
| 
 | ||||
| out_unlock: | ||||
| 	spin_unlock(&block_group->lock); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int btrfs_zone_finish(struct btrfs_block_group *block_group) | ||||
| { | ||||
| 	struct btrfs_fs_info *fs_info = block_group->fs_info; | ||||
| 	struct map_lookup *map; | ||||
| 	struct btrfs_device *device; | ||||
| 	u64 physical; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	if (!btrfs_is_zoned(fs_info)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	map = block_group->physical_map; | ||||
| 	/* Currently support SINGLE profile only */ | ||||
| 	ASSERT(map->num_stripes == 1); | ||||
| 
 | ||||
| 	device = map->stripes[0].dev; | ||||
| 	physical = map->stripes[0].physical; | ||||
| 
 | ||||
| 	if (device->zone_info->max_active_zones == 0) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	spin_lock(&block_group->lock); | ||||
| 	if (!block_group->zone_is_active) { | ||||
| 		spin_unlock(&block_group->lock); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Check if we have unwritten allocated space */ | ||||
| 	if ((block_group->flags & | ||||
| 	     (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) && | ||||
| 	    block_group->alloc_offset > block_group->meta_write_pointer) { | ||||
| 		spin_unlock(&block_group->lock); | ||||
| 		return -EAGAIN; | ||||
| 	} | ||||
| 	spin_unlock(&block_group->lock); | ||||
| 
 | ||||
| 	ret = btrfs_inc_block_group_ro(block_group, false); | ||||
| 	if (ret) | ||||
| 		return ret; | ||||
| 
 | ||||
| 	/* Ensure all writes in this block group finish */ | ||||
| 	btrfs_wait_block_group_reservations(block_group); | ||||
| 	/* No need to wait for NOCOW writers. Zoned mode does not allow that. */ | ||||
| 	btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start, | ||||
| 				 block_group->length); | ||||
| 
 | ||||
| 	spin_lock(&block_group->lock); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Bail out if someone already deactivated the block group, or | ||||
| 	 * allocated space is left in the block group. | ||||
| 	 */ | ||||
| 	if (!block_group->zone_is_active) { | ||||
| 		spin_unlock(&block_group->lock); | ||||
| 		btrfs_dec_block_group_ro(block_group); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	if (block_group->reserved) { | ||||
| 		spin_unlock(&block_group->lock); | ||||
| 		btrfs_dec_block_group_ro(block_group); | ||||
| 		return -EAGAIN; | ||||
| 	} | ||||
| 
 | ||||
| 	block_group->zone_is_active = 0; | ||||
| 	block_group->alloc_offset = block_group->zone_capacity; | ||||
| 	block_group->free_space_ctl->free_space = 0; | ||||
| 	btrfs_clear_treelog_bg(block_group); | ||||
| 	spin_unlock(&block_group->lock); | ||||
| 
 | ||||
| 	ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, | ||||
| 			       physical >> SECTOR_SHIFT, | ||||
| 			       device->zone_info->zone_size >> SECTOR_SHIFT, | ||||
| 			       GFP_NOFS); | ||||
| 	btrfs_dec_block_group_ro(block_group); | ||||
| 
 | ||||
| 	if (!ret) { | ||||
| 		btrfs_dev_clear_active_zone(device, physical); | ||||
| 
 | ||||
| 		spin_lock(&fs_info->zone_active_bgs_lock); | ||||
| 		ASSERT(!list_empty(&block_group->active_bg_list)); | ||||
| 		list_del_init(&block_group->active_bg_list); | ||||
| 		spin_unlock(&fs_info->zone_active_bgs_lock); | ||||
| 
 | ||||
| 		/* For active_bg_list */ | ||||
| 		btrfs_put_block_group(block_group); | ||||
| 	} | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
|  |  | |||
|  | @ -69,6 +69,8 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical, | |||
| 				  u64 physical_start, u64 physical_pos); | ||||
| struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, | ||||
| 					    u64 logical, u64 length); | ||||
| bool btrfs_zone_activate(struct btrfs_block_group *block_group); | ||||
| int btrfs_zone_finish(struct btrfs_block_group *block_group); | ||||
| #else /* CONFIG_BLK_DEV_ZONED */ | ||||
| static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, | ||||
| 				     struct blk_zone *zone) | ||||
|  | @ -204,6 +206,16 @@ static inline struct btrfs_device *btrfs_zoned_get_device( | |||
| 	return ERR_PTR(-EOPNOTSUPP); | ||||
| } | ||||
| 
 | ||||
| static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group) | ||||
| { | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static inline int btrfs_zone_finish(struct btrfs_block_group *block_group) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Naohiro Aota
						Naohiro Aota