mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	btrfs: introduce a new helper to submit write bio for repair
Both scrub and read-repair are utilizing a special repair writes that: - Only writes back to a single device Even for read-repair on RAID56, we only update the corrupted data stripe itself, not triggering the full RMW path. - Requires a valid @mirror_num For RAID56 case, only @mirror_num == 1 is valid. For non-RAID56 cases, we need @mirror_num to locate our stripe. - No data csum generation needed These two call sites still have some differences though: - Read-repair goes plain bio It doesn't need a full btrfs_bio, and goes submit_bio_wait(). - New scrub repair would go btrfs_bio To simplify both read and write path. So here this patch would: - Introduce a common helper, btrfs_map_repair_block() Due to the single device nature, we can use an on-stack btrfs_io_stripe to pass device and its physical bytenr. - Introduce a new interface, btrfs_submit_repair_bio(), for later scrub code This is for the incoming scrub code. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
		
							parent
							
								
									4317ff0056
								
							
						
					
					
						commit
						4886ff7b50
					
				
					 5 changed files with 132 additions and 44 deletions
				
			
		| 
						 | 
					@ -735,12 +735,9 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 | 
				
			||||||
			    u64 length, u64 logical, struct page *page,
 | 
								    u64 length, u64 logical, struct page *page,
 | 
				
			||||||
			    unsigned int pg_offset, int mirror_num)
 | 
								    unsigned int pg_offset, int mirror_num)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct btrfs_device *dev;
 | 
						struct btrfs_io_stripe smap = { 0 };
 | 
				
			||||||
	struct bio_vec bvec;
 | 
						struct bio_vec bvec;
 | 
				
			||||||
	struct bio bio;
 | 
						struct bio bio;
 | 
				
			||||||
	u64 map_length = 0;
 | 
					 | 
				
			||||||
	u64 sector;
 | 
					 | 
				
			||||||
	struct btrfs_io_context *bioc = NULL;
 | 
					 | 
				
			||||||
	int ret = 0;
 | 
						int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
 | 
						ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
 | 
				
			||||||
| 
						 | 
					@ -749,68 +746,38 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 | 
				
			||||||
	if (btrfs_repair_one_zone(fs_info, logical))
 | 
						if (btrfs_repair_one_zone(fs_info, logical))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	map_length = length;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Avoid races with device replace and make sure our bioc has devices
 | 
						 * Avoid races with device replace and make sure our bioc has devices
 | 
				
			||||||
	 * associated to its stripes that don't go away while we are doing the
 | 
						 * associated to its stripes that don't go away while we are doing the
 | 
				
			||||||
	 * read repair operation.
 | 
						 * read repair operation.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	btrfs_bio_counter_inc_blocked(fs_info);
 | 
						btrfs_bio_counter_inc_blocked(fs_info);
 | 
				
			||||||
	if (btrfs_is_parity_mirror(fs_info, logical, length)) {
 | 
						ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
 | 
				
			||||||
		/*
 | 
						if (ret < 0)
 | 
				
			||||||
		 * Note that we don't use BTRFS_MAP_WRITE because it's supposed
 | 
							goto out_counter_dec;
 | 
				
			||||||
		 * to update all raid stripes, but here we just want to correct
 | 
					 | 
				
			||||||
		 * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
 | 
					 | 
				
			||||||
		 * stripe's dev and sector.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
 | 
					 | 
				
			||||||
				      &map_length, &bioc, 0);
 | 
					 | 
				
			||||||
		if (ret)
 | 
					 | 
				
			||||||
			goto out_counter_dec;
 | 
					 | 
				
			||||||
		ASSERT(bioc->mirror_num == 1);
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
 | 
					 | 
				
			||||||
				      &map_length, &bioc, mirror_num);
 | 
					 | 
				
			||||||
		if (ret)
 | 
					 | 
				
			||||||
			goto out_counter_dec;
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * This happens when dev-replace is also running, and the
 | 
					 | 
				
			||||||
		 * mirror_num indicates the dev-replace target.
 | 
					 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * In this case, we don't need to do anything, as the read
 | 
					 | 
				
			||||||
		 * error just means the replace progress hasn't reached our
 | 
					 | 
				
			||||||
		 * read range, and later replace routine would handle it well.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (mirror_num != bioc->mirror_num)
 | 
					 | 
				
			||||||
			goto out_counter_dec;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
 | 
						if (!smap.dev->bdev ||
 | 
				
			||||||
	dev = bioc->stripes[bioc->mirror_num - 1].dev;
 | 
						    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) {
 | 
				
			||||||
	btrfs_put_bioc(bioc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!dev || !dev->bdev ||
 | 
					 | 
				
			||||||
	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
 | 
					 | 
				
			||||||
		ret = -EIO;
 | 
							ret = -EIO;
 | 
				
			||||||
		goto out_counter_dec;
 | 
							goto out_counter_dec;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
 | 
						bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
 | 
				
			||||||
	bio.bi_iter.bi_sector = sector;
 | 
						bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
 | 
				
			||||||
	__bio_add_page(&bio, page, length, pg_offset);
 | 
						__bio_add_page(&bio, page, length, pg_offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	btrfsic_check_bio(&bio);
 | 
						btrfsic_check_bio(&bio);
 | 
				
			||||||
	ret = submit_bio_wait(&bio);
 | 
						ret = submit_bio_wait(&bio);
 | 
				
			||||||
	if (ret) {
 | 
						if (ret) {
 | 
				
			||||||
		/* try to remap that extent elsewhere? */
 | 
							/* try to remap that extent elsewhere? */
 | 
				
			||||||
		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
 | 
							btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);
 | 
				
			||||||
		goto out_bio_uninit;
 | 
							goto out_bio_uninit;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	btrfs_info_rl_in_rcu(fs_info,
 | 
						btrfs_info_rl_in_rcu(fs_info,
 | 
				
			||||||
		"read error corrected: ino %llu off %llu (dev %s sector %llu)",
 | 
							"read error corrected: ino %llu off %llu (dev %s sector %llu)",
 | 
				
			||||||
			     ino, start, btrfs_dev_name(dev), sector);
 | 
								     ino, start, btrfs_dev_name(smap.dev),
 | 
				
			||||||
 | 
								     smap.physical >> SECTOR_SHIFT);
 | 
				
			||||||
	ret = 0;
 | 
						ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_bio_uninit:
 | 
					out_bio_uninit:
 | 
				
			||||||
| 
						 | 
					@ -820,6 +787,45 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Submit a btrfs_bio based repair write.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * If @dev_replace is true, the write would be submitted to dev-replace target.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct btrfs_fs_info *fs_info = bbio->fs_info;
 | 
				
			||||||
 | 
						u64 logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
 | 
				
			||||||
 | 
						u64 length = bbio->bio.bi_iter.bi_size;
 | 
				
			||||||
 | 
						struct btrfs_io_stripe smap = { 0 };
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ASSERT(fs_info);
 | 
				
			||||||
 | 
						ASSERT(mirror_num > 0);
 | 
				
			||||||
 | 
						ASSERT(btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE);
 | 
				
			||||||
 | 
						ASSERT(!bbio->inode);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						btrfs_bio_counter_inc_blocked(fs_info);
 | 
				
			||||||
 | 
						ret = btrfs_map_repair_block(fs_info, &smap, logical, length, mirror_num);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							goto fail;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (dev_replace) {
 | 
				
			||||||
 | 
							if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) {
 | 
				
			||||||
 | 
								bbio->bio.bi_opf &= ~REQ_OP_WRITE;
 | 
				
			||||||
 | 
								bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							ASSERT(smap.dev == fs_info->dev_replace.srcdev);
 | 
				
			||||||
 | 
							smap.dev = fs_info->dev_replace.tgtdev;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						__btrfs_submit_bio(&bbio->bio, NULL, &smap, mirror_num);
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fail:
 | 
				
			||||||
 | 
						btrfs_bio_counter_dec(fs_info);
 | 
				
			||||||
 | 
						btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int __init btrfs_bioset_init(void)
 | 
					int __init btrfs_bioset_init(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
 | 
						if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -98,6 +98,7 @@ static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
 | 
				
			||||||
#define REQ_BTRFS_CGROUP_PUNT			REQ_FS_PRIVATE
 | 
					#define REQ_BTRFS_CGROUP_PUNT			REQ_FS_PRIVATE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
 | 
					void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
 | 
				
			||||||
 | 
					void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
 | 
				
			||||||
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 | 
					int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
 | 
				
			||||||
			    u64 length, u64 logical, struct page *page,
 | 
								    u64 length, u64 logical, struct page *page,
 | 
				
			||||||
			    unsigned int pg_offset, int mirror_num);
 | 
								    unsigned int pg_offset, int mirror_num);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -170,6 +170,11 @@ static inline int nr_data_stripes(const struct map_lookup *map)
 | 
				
			||||||
	return map->num_stripes - btrfs_nr_parity_stripes(map->type);
 | 
						return map->num_stripes - btrfs_nr_parity_stripes(map->type);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define RAID5_P_STRIPE ((u64)-2)
 | 
					#define RAID5_P_STRIPE ((u64)-2)
 | 
				
			||||||
#define RAID6_Q_STRIPE ((u64)-1)
 | 
					#define RAID6_Q_STRIPE ((u64)-1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8019,3 +8019,76 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void map_raid56_repair_block(struct btrfs_io_context *bioc,
 | 
				
			||||||
 | 
									    struct btrfs_io_stripe *smap,
 | 
				
			||||||
 | 
									    u64 logical)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int data_stripes = nr_bioc_data_stripes(bioc);
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < data_stripes; i++) {
 | 
				
			||||||
 | 
							u64 stripe_start = bioc->full_stripe_logical +
 | 
				
			||||||
 | 
									   (i << BTRFS_STRIPE_LEN_SHIFT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (logical >= stripe_start &&
 | 
				
			||||||
 | 
							    logical < stripe_start + BTRFS_STRIPE_LEN)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						ASSERT(i < data_stripes);
 | 
				
			||||||
 | 
						smap->dev = bioc->stripes[i].dev;
 | 
				
			||||||
 | 
						smap->physical = bioc->stripes[i].physical +
 | 
				
			||||||
 | 
								((logical - bioc->full_stripe_logical) &
 | 
				
			||||||
 | 
								 BTRFS_STRIPE_LEN_MASK);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Map a repair write into a single device.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * A repair write is triggered by read time repair or scrub, which would only
 | 
				
			||||||
 | 
					 * update the contents of a single device.
 | 
				
			||||||
 | 
					 * Not update any other mirrors nor go through RMW path.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Callers should ensure:
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * - Call btrfs_bio_counter_inc_blocked() first
 | 
				
			||||||
 | 
					 * - The range does not cross stripe boundary
 | 
				
			||||||
 | 
					 * - Has a valid @mirror_num passed in.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
 | 
								   struct btrfs_io_stripe *smap, u64 logical,
 | 
				
			||||||
 | 
								   u32 length, int mirror_num)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct btrfs_io_context *bioc = NULL;
 | 
				
			||||||
 | 
						u64 map_length = length;
 | 
				
			||||||
 | 
						int mirror_ret = mirror_num;
 | 
				
			||||||
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ASSERT(mirror_num > 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ret = __btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
 | 
				
			||||||
 | 
									&bioc, smap, &mirror_ret, true);
 | 
				
			||||||
 | 
						if (ret < 0)
 | 
				
			||||||
 | 
							return ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* The map range should not cross stripe boundary. */
 | 
				
			||||||
 | 
						ASSERT(map_length >= length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Already mapped to single stripe. */
 | 
				
			||||||
 | 
						if (!bioc)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Map the RAID56 multi-stripe writes to a single one. */
 | 
				
			||||||
 | 
						if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
 | 
				
			||||||
 | 
							map_raid56_repair_block(bioc, smap, logical);
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ASSERT(mirror_num <= bioc->num_stripes);
 | 
				
			||||||
 | 
						smap->dev = bioc->stripes[mirror_num - 1].dev;
 | 
				
			||||||
 | 
						smap->physical = bioc->stripes[mirror_num - 1].physical;
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						btrfs_put_bioc(bioc);
 | 
				
			||||||
 | 
						ASSERT(smap->dev);
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -587,6 +587,9 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 | 
				
			||||||
		      struct btrfs_io_context **bioc_ret,
 | 
							      struct btrfs_io_context **bioc_ret,
 | 
				
			||||||
		      struct btrfs_io_stripe *smap, int *mirror_num_ret,
 | 
							      struct btrfs_io_stripe *smap, int *mirror_num_ret,
 | 
				
			||||||
		      int need_raid_map);
 | 
							      int need_raid_map);
 | 
				
			||||||
 | 
					int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
 | 
								   struct btrfs_io_stripe *smap, u64 logical,
 | 
				
			||||||
 | 
								   u32 length, int mirror_num);
 | 
				
			||||||
struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
 | 
					struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
 | 
				
			||||||
					       u64 logical, u64 *length_ret,
 | 
										       u64 logical, u64 *length_ret,
 | 
				
			||||||
					       u32 *num_stripes);
 | 
										       u32 *num_stripes);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue