mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	btrfs: introduce mount option rescue=ignorebadroots
In the face of extent root corruption, or any other core fs wide root corruption we will fail to mount the file system. This makes recovery kind of a pain, because you need to fall back to userspace tools to scrape off data. Instead provide a mechanism to gracefully handle bad roots, so we can at least mount read-only and possibly recover data from the file system. Signed-off-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
		
							parent
							
								
									68319c18cb
								
							
						
					
					
						commit
						42437a6386
					
				
					 10 changed files with 130 additions and 28 deletions
				
			
		|  | @ -1985,6 +1985,51 @@ static int read_one_block_group(struct btrfs_fs_info *info, | |||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) | ||||
| { | ||||
| 	struct extent_map_tree *em_tree = &fs_info->mapping_tree; | ||||
| 	struct btrfs_space_info *space_info; | ||||
| 	struct rb_node *node; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) { | ||||
| 		struct extent_map *em; | ||||
| 		struct map_lookup *map; | ||||
| 		struct btrfs_block_group *bg; | ||||
| 
 | ||||
| 		em = rb_entry(node, struct extent_map, rb_node); | ||||
| 		map = em->map_lookup; | ||||
| 		bg = btrfs_create_block_group_cache(fs_info, em->start); | ||||
| 		if (!bg) { | ||||
| 			ret = -ENOMEM; | ||||
| 			break; | ||||
| 		} | ||||
| 
 | ||||
| 		/* Fill dummy cache as FULL */ | ||||
| 		bg->length = em->len; | ||||
| 		bg->flags = map->type; | ||||
| 		bg->last_byte_to_unpin = (u64)-1; | ||||
| 		bg->cached = BTRFS_CACHE_FINISHED; | ||||
| 		bg->used = em->len; | ||||
| 		bg->flags = map->type; | ||||
| 		ret = btrfs_add_block_group_cache(fs_info, bg); | ||||
| 		if (ret) { | ||||
| 			btrfs_remove_free_space_cache(bg); | ||||
| 			btrfs_put_block_group(bg); | ||||
| 			break; | ||||
| 		} | ||||
| 		btrfs_update_space_info(fs_info, bg->flags, em->len, em->len, | ||||
| 					0, &space_info); | ||||
| 		bg->space_info = space_info; | ||||
| 		link_block_group(bg); | ||||
| 
 | ||||
| 		set_avail_alloc_bits(fs_info, bg->flags); | ||||
| 	} | ||||
| 	if (!ret) | ||||
| 		btrfs_init_global_block_rsv(fs_info); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| int btrfs_read_block_groups(struct btrfs_fs_info *info) | ||||
| { | ||||
| 	struct btrfs_path *path; | ||||
|  | @ -1995,6 +2040,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) | |||
| 	int need_clear = 0; | ||||
| 	u64 cache_gen; | ||||
| 
 | ||||
| 	if (!info->extent_root) | ||||
| 		return fill_dummy_bgs(info); | ||||
| 
 | ||||
| 	key.objectid = 0; | ||||
| 	key.offset = 0; | ||||
| 	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | ||||
|  |  | |||
|  | @ -426,6 +426,14 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 	fs_info->delayed_block_rsv.space_info = space_info; | ||||
| 	fs_info->delayed_refs_rsv.space_info = space_info; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Our various recovery options can leave us with NULL roots, so check | ||||
| 	 * here and just bail before we go dereferencing NULLs everywhere. | ||||
| 	 */ | ||||
| 	if (!fs_info->extent_root || !fs_info->csum_root || | ||||
| 	    !fs_info->dev_root || !fs_info->chunk_root || !fs_info->tree_root) | ||||
| 		return; | ||||
| 
 | ||||
| 	fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv; | ||||
| 	fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv; | ||||
| 	fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||||
|  |  | |||
|  | @ -150,7 +150,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, | |||
| 	struct compressed_bio *cb = bio->bi_private; | ||||
| 	u8 *cb_sum = cb->sums; | ||||
| 
 | ||||
| 	if (inode->flags & BTRFS_INODE_NODATASUM) | ||||
| 	if (!fs_info->csum_root || (inode->flags & BTRFS_INODE_NODATASUM)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	shash->tfm = fs_info->csum_shash; | ||||
|  |  | |||
|  | @ -1298,6 +1298,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) | |||
| #define BTRFS_MOUNT_NOLOGREPLAY		(1 << 27) | ||||
| #define BTRFS_MOUNT_REF_VERIFY		(1 << 28) | ||||
| #define BTRFS_MOUNT_DISCARD_ASYNC	(1 << 29) | ||||
| #define BTRFS_MOUNT_IGNOREBADROOTS	(1 << 30) | ||||
| 
 | ||||
| #define BTRFS_DEFAULT_COMMIT_INTERVAL	(30) | ||||
| #define BTRFS_DEFAULT_MAX_INLINE	(2048) | ||||
|  |  | |||
|  | @ -2307,30 +2307,39 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) | |||
| 
 | ||||
| 	root = btrfs_read_tree_root(tree_root, &location); | ||||
| 	if (IS_ERR(root)) { | ||||
| 		ret = PTR_ERR(root); | ||||
| 		goto out; | ||||
| 		if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { | ||||
| 			ret = PTR_ERR(root); | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} else { | ||||
| 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 		fs_info->extent_root = root; | ||||
| 	} | ||||
| 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 	fs_info->extent_root = root; | ||||
| 
 | ||||
| 	location.objectid = BTRFS_DEV_TREE_OBJECTID; | ||||
| 	root = btrfs_read_tree_root(tree_root, &location); | ||||
| 	if (IS_ERR(root)) { | ||||
| 		ret = PTR_ERR(root); | ||||
| 		goto out; | ||||
| 		if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { | ||||
| 			ret = PTR_ERR(root); | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} else { | ||||
| 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 		fs_info->dev_root = root; | ||||
| 		btrfs_init_devices_late(fs_info); | ||||
| 	} | ||||
| 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 	fs_info->dev_root = root; | ||||
| 	btrfs_init_devices_late(fs_info); | ||||
| 
 | ||||
| 	location.objectid = BTRFS_CSUM_TREE_OBJECTID; | ||||
| 	root = btrfs_read_tree_root(tree_root, &location); | ||||
| 	if (IS_ERR(root)) { | ||||
| 		ret = PTR_ERR(root); | ||||
| 		goto out; | ||||
| 		if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { | ||||
| 			ret = PTR_ERR(root); | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} else { | ||||
| 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 		fs_info->csum_root = root; | ||||
| 	} | ||||
| 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 	fs_info->csum_root = root; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * This tree can share blocks with some other fs tree during relocation | ||||
|  | @ -2339,11 +2348,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) | |||
| 	root = btrfs_get_fs_root(tree_root->fs_info, | ||||
| 				 BTRFS_DATA_RELOC_TREE_OBJECTID, true); | ||||
| 	if (IS_ERR(root)) { | ||||
| 		ret = PTR_ERR(root); | ||||
| 		goto out; | ||||
| 		if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { | ||||
| 			ret = PTR_ERR(root); | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} else { | ||||
| 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 		fs_info->data_reloc_root = root; | ||||
| 	} | ||||
| 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 	fs_info->data_reloc_root = root; | ||||
| 
 | ||||
| 	location.objectid = BTRFS_QUOTA_TREE_OBJECTID; | ||||
| 	root = btrfs_read_tree_root(tree_root, &location); | ||||
|  | @ -2356,9 +2368,11 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) | |||
| 	location.objectid = BTRFS_UUID_TREE_OBJECTID; | ||||
| 	root = btrfs_read_tree_root(tree_root, &location); | ||||
| 	if (IS_ERR(root)) { | ||||
| 		ret = PTR_ERR(root); | ||||
| 		if (ret != -ENOENT) | ||||
| 			goto out; | ||||
| 		if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { | ||||
| 			ret = PTR_ERR(root); | ||||
| 			if (ret != -ENOENT) | ||||
| 				goto out; | ||||
| 		} | ||||
| 	} else { | ||||
| 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 		fs_info->uuid_root = root; | ||||
|  | @ -2368,11 +2382,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) | |||
| 		location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID; | ||||
| 		root = btrfs_read_tree_root(tree_root, &location); | ||||
| 		if (IS_ERR(root)) { | ||||
| 			ret = PTR_ERR(root); | ||||
| 			goto out; | ||||
| 			if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { | ||||
| 				ret = PTR_ERR(root); | ||||
| 				goto out; | ||||
| 			} | ||||
| 		}  else { | ||||
| 			set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 			fs_info->free_space_root = root; | ||||
| 		} | ||||
| 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||||
| 		fs_info->free_space_root = root; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
|  |  | |||
|  | @ -272,7 +272,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, | |||
| 	int count = 0; | ||||
| 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); | ||||
| 
 | ||||
| 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) | ||||
| 	if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) | ||||
| 		return BLK_STS_OK; | ||||
| 
 | ||||
| 	path = btrfs_alloc_path(); | ||||
|  |  | |||
|  | @ -2187,7 +2187,8 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, | |||
| 	int skip_sum; | ||||
| 	int async = !atomic_read(&BTRFS_I(inode)->sync_writers); | ||||
| 
 | ||||
| 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||||
| 	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) || | ||||
| 		   !fs_info->csum_root; | ||||
| 
 | ||||
| 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) | ||||
| 		metadata = BTRFS_WQ_ENDIO_FREE_SPACE; | ||||
|  | @ -2902,6 +2903,9 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset, | |||
| 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (!root->fs_info->csum_root) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | ||||
| 	    test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { | ||||
| 		clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM); | ||||
|  |  | |||
|  | @ -360,6 +360,7 @@ enum { | |||
| 	Opt_rescue, | ||||
| 	Opt_usebackuproot, | ||||
| 	Opt_nologreplay, | ||||
| 	Opt_ignorebadroots, | ||||
| 
 | ||||
| 	/* Deprecated options */ | ||||
| 	Opt_recovery, | ||||
|  | @ -455,6 +456,8 @@ static const match_table_t tokens = { | |||
| static const match_table_t rescue_tokens = { | ||||
| 	{Opt_usebackuproot, "usebackuproot"}, | ||||
| 	{Opt_nologreplay, "nologreplay"}, | ||||
| 	{Opt_ignorebadroots, "ignorebadroots"}, | ||||
| 	{Opt_ignorebadroots, "ibadroots"}, | ||||
| 	{Opt_err, NULL}, | ||||
| }; | ||||
| 
 | ||||
|  | @ -498,6 +501,10 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options) | |||
| 			btrfs_set_and_info(info, NOLOGREPLAY, | ||||
| 					   "disabling log replay at mount time"); | ||||
| 			break; | ||||
| 		case Opt_ignorebadroots: | ||||
| 			btrfs_set_and_info(info, IGNOREBADROOTS, | ||||
| 					   "ignoring bad roots"); | ||||
| 			break; | ||||
| 		case Opt_err: | ||||
| 			btrfs_info(info, "unrecognized rescue option '%s'", p); | ||||
| 			ret = -EINVAL; | ||||
|  | @ -983,7 +990,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, | |||
| 	if (new_flags & SB_RDONLY) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay")) | ||||
| 	if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") || | ||||
| 	    check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots")) | ||||
| 		ret = -EINVAL; | ||||
| out: | ||||
| 	if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) && | ||||
|  | @ -1439,6 +1447,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
| 		print_rescue_option(seq, "nologreplay", &printed); | ||||
| 	if (btrfs_test_opt(info, USEBACKUPROOT)) | ||||
| 		print_rescue_option(seq, "usebackuproot", &printed); | ||||
| 	if (btrfs_test_opt(info, IGNOREBADROOTS)) | ||||
| 		print_rescue_option(seq, "ignorebadroots", &printed); | ||||
| 	if (btrfs_test_opt(info, FLUSHONCOMMIT)) | ||||
| 		seq_puts(seq, ",flushoncommit"); | ||||
| 	if (btrfs_test_opt(info, DISCARD_SYNC)) | ||||
|  |  | |||
|  | @ -332,6 +332,7 @@ BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show); | |||
| static const char *rescue_opts[] = { | ||||
| 	"usebackuproot", | ||||
| 	"nologreplay", | ||||
| 	"ignorebadroots", | ||||
| }; | ||||
| 
 | ||||
| static ssize_t supported_rescue_options_show(struct kobject *kobj, | ||||
|  |  | |||
|  | @ -7659,6 +7659,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info) | |||
| 	u64 prev_dev_ext_end = 0; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We don't have a dev_root because we mounted with ignorebadroots and | ||||
| 	 * failed to load the root, so we want to skip the verification in this | ||||
| 	 * case for sure. | ||||
| 	 * | ||||
| 	 * However if the dev root is fine, but the tree itself is corrupted | ||||
| 	 * we'd still fail to mount.  This verification is only to make sure | ||||
| 	 * writes can happen safely, so instead just bypass this check | ||||
| 	 * completely in the case of IGNOREBADROOTS. | ||||
| 	 */ | ||||
| 	if (btrfs_test_opt(fs_info, IGNOREBADROOTS)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	key.objectid = 1; | ||||
| 	key.type = BTRFS_DEV_EXTENT_KEY; | ||||
| 	key.offset = 0; | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Josef Bacik
						Josef Bacik