mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	btrfs: defer adding raid type kobject until after chunk relocation
Any time the first block group of a new type is created, we add a new kobject to sysfs to hold the attributes for that type. Kobject-internal allocations always use GFP_KERNEL, making them prone to fs-reclaim races. While it appears as if this can occur any time a block group is created, the only times the first block group of a new type can be created in memory is at mount and when we create the first new block group during raid conversion. This patch adds a new list to track pending kobject additions and then handles them after we do chunk relocation. Between relocating the target chunk (or forcing allocation of a new chunk in the case of data) and removing the old chunk, we're in a safe place for fs-reclaim to occur. We're holding the volume mutex, which is already held across page faults, and the delete_unused_bgs_mutex, which will only stall the cleaner thread. Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
		
							parent
							
								
									dc2d3005d2
								
							
						
					
					
						commit
						75cb379d26
					
				
					 5 changed files with 62 additions and 20 deletions
				
			
		| 
						 | 
					@ -385,8 +385,9 @@ struct btrfs_dev_replace {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* For raid type sysfs entries */
 | 
					/* For raid type sysfs entries */
 | 
				
			||||||
struct raid_kobject {
 | 
					struct raid_kobject {
 | 
				
			||||||
	int raid_type;
 | 
						u64 flags;
 | 
				
			||||||
	struct kobject kobj;
 | 
						struct kobject kobj;
 | 
				
			||||||
 | 
						struct list_head list;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct btrfs_space_info {
 | 
					struct btrfs_space_info {
 | 
				
			||||||
| 
						 | 
					@ -940,6 +941,8 @@ struct btrfs_fs_info {
 | 
				
			||||||
	u32 thread_pool_size;
 | 
						u32 thread_pool_size;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct kobject *space_info_kobj;
 | 
						struct kobject *space_info_kobj;
 | 
				
			||||||
 | 
						struct list_head pending_raid_kobjs;
 | 
				
			||||||
 | 
						spinlock_t pending_raid_kobjs_lock; /* uncontended */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	u64 total_pinned;
 | 
						u64 total_pinned;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2700,6 +2703,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
 | 
				
			||||||
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 | 
					int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 | 
				
			||||||
			   struct btrfs_fs_info *fs_info, u64 bytes_used,
 | 
								   struct btrfs_fs_info *fs_info, u64 bytes_used,
 | 
				
			||||||
			   u64 type, u64 chunk_offset, u64 size);
 | 
								   u64 type, u64 chunk_offset, u64 size);
 | 
				
			||||||
 | 
					void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
 | 
				
			||||||
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 | 
					struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 | 
				
			||||||
				struct btrfs_fs_info *fs_info,
 | 
									struct btrfs_fs_info *fs_info,
 | 
				
			||||||
				const u64 chunk_offset);
 | 
									const u64 chunk_offset);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2431,6 +2431,8 @@ int open_ctree(struct super_block *sb,
 | 
				
			||||||
	INIT_LIST_HEAD(&fs_info->delayed_iputs);
 | 
						INIT_LIST_HEAD(&fs_info->delayed_iputs);
 | 
				
			||||||
	INIT_LIST_HEAD(&fs_info->delalloc_roots);
 | 
						INIT_LIST_HEAD(&fs_info->delalloc_roots);
 | 
				
			||||||
	INIT_LIST_HEAD(&fs_info->caching_block_groups);
 | 
						INIT_LIST_HEAD(&fs_info->caching_block_groups);
 | 
				
			||||||
 | 
						INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
 | 
				
			||||||
 | 
						spin_lock_init(&fs_info->pending_raid_kobjs_lock);
 | 
				
			||||||
	spin_lock_init(&fs_info->delalloc_root_lock);
 | 
						spin_lock_init(&fs_info->delalloc_root_lock);
 | 
				
			||||||
	spin_lock_init(&fs_info->trans_lock);
 | 
						spin_lock_init(&fs_info->trans_lock);
 | 
				
			||||||
	spin_lock_init(&fs_info->fs_roots_radix_lock);
 | 
						spin_lock_init(&fs_info->fs_roots_radix_lock);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9918,9 +9918,39 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* link_block_group will queue up kobjects to add when we're reclaim-safe */
 | 
				
			||||||
 | 
					void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct btrfs_space_info *space_info;
 | 
				
			||||||
 | 
						struct raid_kobject *rkobj;
 | 
				
			||||||
 | 
						LIST_HEAD(list);
 | 
				
			||||||
 | 
						int index;
 | 
				
			||||||
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&fs_info->pending_raid_kobjs_lock);
 | 
				
			||||||
 | 
						list_splice_init(&fs_info->pending_raid_kobjs, &list);
 | 
				
			||||||
 | 
						spin_unlock(&fs_info->pending_raid_kobjs_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						list_for_each_entry(rkobj, &list, list) {
 | 
				
			||||||
 | 
							space_info = __find_space_info(fs_info, rkobj->flags);
 | 
				
			||||||
 | 
							index = btrfs_bg_flags_to_raid_index(rkobj->flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ret = kobject_add(&rkobj->kobj, &space_info->kobj,
 | 
				
			||||||
 | 
									  "%s", get_raid_name(index));
 | 
				
			||||||
 | 
							if (ret) {
 | 
				
			||||||
 | 
								kobject_put(&rkobj->kobj);
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (ret)
 | 
				
			||||||
 | 
							btrfs_warn(fs_info,
 | 
				
			||||||
 | 
								   "failed to add kobject for block cache, ignoring");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void link_block_group(struct btrfs_block_group_cache *cache)
 | 
					static void link_block_group(struct btrfs_block_group_cache *cache)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btrfs_space_info *space_info = cache->space_info;
 | 
						struct btrfs_space_info *space_info = cache->space_info;
 | 
				
			||||||
 | 
						struct btrfs_fs_info *fs_info = cache->fs_info;
 | 
				
			||||||
	int index = btrfs_bg_flags_to_raid_index(cache->flags);
 | 
						int index = btrfs_bg_flags_to_raid_index(cache->flags);
 | 
				
			||||||
	bool first = false;
 | 
						bool first = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9931,27 +9961,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
 | 
				
			||||||
	up_write(&space_info->groups_sem);
 | 
						up_write(&space_info->groups_sem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (first) {
 | 
						if (first) {
 | 
				
			||||||
		struct raid_kobject *rkobj;
 | 
							struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
 | 
				
			||||||
		int ret;
 | 
							if (!rkobj) {
 | 
				
			||||||
 | 
								btrfs_warn(cache->fs_info,
 | 
				
			||||||
		rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
 | 
									"couldn't alloc memory for raid level kobject");
 | 
				
			||||||
		if (!rkobj)
 | 
								return;
 | 
				
			||||||
			goto out_err;
 | 
					 | 
				
			||||||
		rkobj->raid_type = index;
 | 
					 | 
				
			||||||
		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
 | 
					 | 
				
			||||||
		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
 | 
					 | 
				
			||||||
				  "%s", get_raid_name(index));
 | 
					 | 
				
			||||||
		if (ret) {
 | 
					 | 
				
			||||||
			kobject_put(&rkobj->kobj);
 | 
					 | 
				
			||||||
			goto out_err;
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							rkobj->flags = cache->flags;
 | 
				
			||||||
 | 
							kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spin_lock(&fs_info->pending_raid_kobjs_lock);
 | 
				
			||||||
 | 
							list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
 | 
				
			||||||
 | 
							spin_unlock(&fs_info->pending_raid_kobjs_lock);
 | 
				
			||||||
		space_info->block_group_kobjs[index] = &rkobj->kobj;
 | 
							space_info->block_group_kobjs[index] = &rkobj->kobj;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					 | 
				
			||||||
	return;
 | 
					 | 
				
			||||||
out_err:
 | 
					 | 
				
			||||||
	btrfs_warn(cache->fs_info,
 | 
					 | 
				
			||||||
		   "failed to add kobject for block cache, ignoring");
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct btrfs_block_group_cache *
 | 
					static struct btrfs_block_group_cache *
 | 
				
			||||||
| 
						 | 
					@ -10167,6 +10190,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 | 
				
			||||||
			inc_block_group_ro(cache, 1);
 | 
								inc_block_group_ro(cache, 1);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						btrfs_add_raid_kobjects(info);
 | 
				
			||||||
	init_global_block_rsv(info);
 | 
						init_global_block_rsv(info);
 | 
				
			||||||
	ret = 0;
 | 
						ret = 0;
 | 
				
			||||||
error:
 | 
					error:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
 | 
						struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
 | 
				
			||||||
	struct btrfs_block_group_cache *block_group;
 | 
						struct btrfs_block_group_cache *block_group;
 | 
				
			||||||
	int index = to_raid_kobj(kobj)->raid_type;
 | 
						int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
 | 
				
			||||||
	u64 val = 0;
 | 
						u64 val = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	down_read(&sinfo->groups_sem);
 | 
						down_read(&sinfo->groups_sem);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3003,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 | 
				
			||||||
	if (ret)
 | 
						if (ret)
 | 
				
			||||||
		return ret;
 | 
							return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We add the kobjects here (and after forcing data chunk creation)
 | 
				
			||||||
 | 
						 * since relocation is the only place we'll create chunks of a new
 | 
				
			||||||
 | 
						 * type at runtime.  The only place where we'll remove the last
 | 
				
			||||||
 | 
						 * chunk of a type is the call immediately below this one.  Even
 | 
				
			||||||
 | 
						 * so, we're protected against races with the cleaner thread since
 | 
				
			||||||
 | 
						 * we're covered by the delete_unused_bgs_mutex.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						btrfs_add_raid_kobjects(fs_info);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trans = btrfs_start_trans_remove_block_group(root->fs_info,
 | 
						trans = btrfs_start_trans_remove_block_group(root->fs_info,
 | 
				
			||||||
						     chunk_offset);
 | 
											     chunk_offset);
 | 
				
			||||||
	if (IS_ERR(trans)) {
 | 
						if (IS_ERR(trans)) {
 | 
				
			||||||
| 
						 | 
					@ -3130,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
			if (ret < 0)
 | 
								if (ret < 0)
 | 
				
			||||||
				return ret;
 | 
									return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								btrfs_add_raid_kobjects(fs_info);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			return 1;
 | 
								return 1;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue