mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	md/raid10: remove rcu protection to access rdev from conf
Because it's safe to accees rdev from conf: - If any spinlock is held, because synchronize_rcu() from md_kick_rdev_from_array() will prevent 'rdev' to be freed until spinlock is released; - If 'reconfig_lock' is held, because rdev can't be added or removed from array; - If there is normal IO inflight, because mddev_suspend() will prevent rdev to be added or removed from array; - If there is sync IO inflight, because 'MD_RECOVERY_RUNNING' is checked in remove_and_add_spares(). And these will cover all the scenarios in raid10. This patch also cleanup the code to handle the case that replacement replace rdev while IO is still inflight. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20231125081604.3939938-3-yukuai1@huaweicloud.com
This commit is contained in:
		
							parent
							
								
									c891f1fd90
								
							
						
					
					
						commit
						a448af25be
					
				
					 1 changed files with 58 additions and 155 deletions
				
			
		| 
						 | 
				
			
			@ -743,7 +743,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
 | 
			
		|||
	struct geom *geo = &conf->geo;
 | 
			
		||||
 | 
			
		||||
	raid10_find_phys(conf, r10_bio);
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	best_dist_slot = -1;
 | 
			
		||||
	min_pending = UINT_MAX;
 | 
			
		||||
	best_dist_rdev = NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -775,18 +774,11 @@ static struct md_rdev *read_balance(struct r10conf *conf,
 | 
			
		|||
		if (r10_bio->devs[slot].bio == IO_BLOCKED)
 | 
			
		||||
			continue;
 | 
			
		||||
		disk = r10_bio->devs[slot].devnum;
 | 
			
		||||
		rdev = rcu_dereference(conf->mirrors[disk].replacement);
 | 
			
		||||
		rdev = conf->mirrors[disk].replacement;
 | 
			
		||||
		if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
 | 
			
		||||
		    r10_bio->devs[slot].addr + sectors >
 | 
			
		||||
		    rdev->recovery_offset) {
 | 
			
		||||
			/*
 | 
			
		||||
			 * Read replacement first to prevent reading both rdev
 | 
			
		||||
			 * and replacement as NULL during replacement replace
 | 
			
		||||
			 * rdev.
 | 
			
		||||
			 */
 | 
			
		||||
			smp_mb();
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[disk].rdev);
 | 
			
		||||
		}
 | 
			
		||||
		    rdev->recovery_offset)
 | 
			
		||||
			rdev = conf->mirrors[disk].rdev;
 | 
			
		||||
		if (rdev == NULL ||
 | 
			
		||||
		    test_bit(Faulty, &rdev->flags))
 | 
			
		||||
			continue;
 | 
			
		||||
| 
						 | 
				
			
			@ -876,7 +868,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
 | 
			
		|||
		r10_bio->read_slot = slot;
 | 
			
		||||
	} else
 | 
			
		||||
		rdev = NULL;
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	*max_sectors = best_good_sectors;
 | 
			
		||||
 | 
			
		||||
	return rdev;
 | 
			
		||||
| 
						 | 
				
			
			@ -1198,9 +1189,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 | 
			
		|||
		 */
 | 
			
		||||
		gfp = GFP_NOIO | __GFP_HIGH;
 | 
			
		||||
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		disk = r10_bio->devs[slot].devnum;
 | 
			
		||||
		err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
 | 
			
		||||
		err_rdev = conf->mirrors[disk].rdev;
 | 
			
		||||
		if (err_rdev)
 | 
			
		||||
			snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
 | 
			
		||||
		else {
 | 
			
		||||
| 
						 | 
				
			
			@ -1208,7 +1198,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 | 
			
		|||
			/* This never gets dereferenced */
 | 
			
		||||
			err_rdev = r10_bio->devs[slot].rdev;
 | 
			
		||||
		}
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
 | 
			
		||||
| 
						 | 
				
			
			@ -1279,15 +1268,8 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 | 
			
		|||
	int devnum = r10_bio->devs[n_copy].devnum;
 | 
			
		||||
	struct bio *mbio;
 | 
			
		||||
 | 
			
		||||
	if (replacement) {
 | 
			
		||||
		rdev = conf->mirrors[devnum].replacement;
 | 
			
		||||
		if (rdev == NULL) {
 | 
			
		||||
			/* Replacement just got moved to main 'rdev' */
 | 
			
		||||
			smp_mb();
 | 
			
		||||
			rdev = conf->mirrors[devnum].rdev;
 | 
			
		||||
		}
 | 
			
		||||
	} else
 | 
			
		||||
		rdev = conf->mirrors[devnum].rdev;
 | 
			
		||||
	rdev = replacement ? conf->mirrors[devnum].replacement :
 | 
			
		||||
			     conf->mirrors[devnum].rdev;
 | 
			
		||||
 | 
			
		||||
	mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
 | 
			
		||||
	if (replacement)
 | 
			
		||||
| 
						 | 
				
			
			@ -1321,25 +1303,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
 | 
			
		|||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct md_rdev *dereference_rdev_and_rrdev(struct raid10_info *mirror,
 | 
			
		||||
						  struct md_rdev **prrdev)
 | 
			
		||||
{
 | 
			
		||||
	struct md_rdev *rdev, *rrdev;
 | 
			
		||||
 | 
			
		||||
	rrdev = rcu_dereference(mirror->replacement);
 | 
			
		||||
	/*
 | 
			
		||||
	 * Read replacement first to prevent reading both rdev and
 | 
			
		||||
	 * replacement as NULL during replacement replace rdev.
 | 
			
		||||
	 */
 | 
			
		||||
	smp_mb();
 | 
			
		||||
	rdev = rcu_dereference(mirror->rdev);
 | 
			
		||||
	if (rdev == rrdev)
 | 
			
		||||
		rrdev = NULL;
 | 
			
		||||
 | 
			
		||||
	*prrdev = rrdev;
 | 
			
		||||
	return rdev;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
| 
						 | 
				
			
			@ -1348,11 +1311,11 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
 | 
			
		|||
 | 
			
		||||
retry_wait:
 | 
			
		||||
	blocked_rdev = NULL;
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	for (i = 0; i < conf->copies; i++) {
 | 
			
		||||
		struct md_rdev *rdev, *rrdev;
 | 
			
		||||
 | 
			
		||||
		rdev = dereference_rdev_and_rrdev(&conf->mirrors[i], &rrdev);
 | 
			
		||||
		rdev = conf->mirrors[i].rdev;
 | 
			
		||||
		rrdev = conf->mirrors[i].replacement;
 | 
			
		||||
		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
 | 
			
		||||
			atomic_inc(&rdev->nr_pending);
 | 
			
		||||
			blocked_rdev = rdev;
 | 
			
		||||
| 
						 | 
				
			
			@ -1391,7 +1354,6 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
 | 
			
		|||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	if (unlikely(blocked_rdev)) {
 | 
			
		||||
		/* Have to wait for this device to get unblocked, then retry */
 | 
			
		||||
| 
						 | 
				
			
			@ -1474,14 +1436,14 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 | 
			
		|||
 | 
			
		||||
	wait_blocked_dev(mddev, r10_bio);
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	max_sectors = r10_bio->sectors;
 | 
			
		||||
 | 
			
		||||
	for (i = 0;  i < conf->copies; i++) {
 | 
			
		||||
		int d = r10_bio->devs[i].devnum;
 | 
			
		||||
		struct md_rdev *rdev, *rrdev;
 | 
			
		||||
 | 
			
		||||
		rdev = dereference_rdev_and_rrdev(&conf->mirrors[d], &rrdev);
 | 
			
		||||
		rdev = conf->mirrors[d].rdev;
 | 
			
		||||
		rrdev = conf->mirrors[d].replacement;
 | 
			
		||||
		if (rdev && (test_bit(Faulty, &rdev->flags)))
 | 
			
		||||
			rdev = NULL;
 | 
			
		||||
		if (rrdev && (test_bit(Faulty, &rrdev->flags)))
 | 
			
		||||
| 
						 | 
				
			
			@ -1535,7 +1497,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 | 
			
		|||
			atomic_inc(&rrdev->nr_pending);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	if (max_sectors < r10_bio->sectors)
 | 
			
		||||
		r10_bio->sectors = max_sectors;
 | 
			
		||||
| 
						 | 
				
			
			@ -1625,17 +1586,8 @@ static void raid10_end_discard_request(struct bio *bio)
 | 
			
		|||
		set_bit(R10BIO_Uptodate, &r10_bio->state);
 | 
			
		||||
 | 
			
		||||
	dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 | 
			
		||||
	if (repl)
 | 
			
		||||
		rdev = conf->mirrors[dev].replacement;
 | 
			
		||||
	if (!rdev) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * raid10_remove_disk uses smp_mb to make sure rdev is set to
 | 
			
		||||
		 * replacement before setting replacement to NULL. It can read
 | 
			
		||||
		 * rdev first without barrier protect even replacement is NULL
 | 
			
		||||
		 */
 | 
			
		||||
		smp_rmb();
 | 
			
		||||
		rdev = conf->mirrors[dev].rdev;
 | 
			
		||||
	}
 | 
			
		||||
	rdev = repl ? conf->mirrors[dev].replacement :
 | 
			
		||||
		      conf->mirrors[dev].rdev;
 | 
			
		||||
 | 
			
		||||
	raid_end_discard_bio(r10_bio);
 | 
			
		||||
	rdev_dec_pending(rdev, conf->mddev);
 | 
			
		||||
| 
						 | 
				
			
			@ -1785,11 +1737,11 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 | 
			
		|||
	 * inc refcount on their rdev.  Record them by setting
 | 
			
		||||
	 * bios[x] to bio
 | 
			
		||||
	 */
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	for (disk = 0; disk < geo->raid_disks; disk++) {
 | 
			
		||||
		struct md_rdev *rdev, *rrdev;
 | 
			
		||||
 | 
			
		||||
		rdev = dereference_rdev_and_rrdev(&conf->mirrors[disk], &rrdev);
 | 
			
		||||
		rdev = conf->mirrors[disk].rdev;
 | 
			
		||||
		rrdev = conf->mirrors[disk].replacement;
 | 
			
		||||
		r10_bio->devs[disk].bio = NULL;
 | 
			
		||||
		r10_bio->devs[disk].repl_bio = NULL;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1809,7 +1761,6 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
 | 
			
		|||
			atomic_inc(&rrdev->nr_pending);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
	atomic_set(&r10_bio->remaining, 1);
 | 
			
		||||
	for (disk = 0; disk < geo->raid_disks; disk++) {
 | 
			
		||||
| 
						 | 
				
			
			@ -1939,6 +1890,8 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
 | 
			
		|||
	struct r10conf *conf = mddev->private;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	lockdep_assert_held(&mddev->lock);
 | 
			
		||||
 | 
			
		||||
	if (conf->geo.near_copies < conf->geo.raid_disks)
 | 
			
		||||
		seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
 | 
			
		||||
	if (conf->geo.near_copies > 1)
 | 
			
		||||
| 
						 | 
				
			
			@ -1953,12 +1906,11 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
 | 
			
		|||
	}
 | 
			
		||||
	seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
 | 
			
		||||
					conf->geo.raid_disks - mddev->degraded);
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	for (i = 0; i < conf->geo.raid_disks; i++) {
 | 
			
		||||
		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
 | 
			
		||||
		struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
 | 
			
		||||
 | 
			
		||||
		seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	seq_printf(seq, "]");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1980,7 +1932,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
 | 
			
		|||
		ncopies = conf->geo.near_copies;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	do {
 | 
			
		||||
		int n = conf->copies;
 | 
			
		||||
		int cnt = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -1988,7 +1939,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
 | 
			
		|||
		while (n--) {
 | 
			
		||||
			struct md_rdev *rdev;
 | 
			
		||||
			if (this != ignore &&
 | 
			
		||||
			    (rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
 | 
			
		||||
			    (rdev = conf->mirrors[this].rdev) &&
 | 
			
		||||
			    test_bit(In_sync, &rdev->flags))
 | 
			
		||||
				cnt++;
 | 
			
		||||
			this = (this+1) % disks;
 | 
			
		||||
| 
						 | 
				
			
			@ -1999,7 +1950,6 @@ static int _enough(struct r10conf *conf, int previous, int ignore)
 | 
			
		|||
	} while (first != 0);
 | 
			
		||||
	has_enough = 1;
 | 
			
		||||
out:
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	return has_enough;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2072,8 +2022,7 @@ static void print_conf(struct r10conf *conf)
 | 
			
		|||
	pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
 | 
			
		||||
		 conf->geo.raid_disks);
 | 
			
		||||
 | 
			
		||||
	/* This is only called with ->reconfix_mutex held, so
 | 
			
		||||
	 * rcu protection of rdev is not needed */
 | 
			
		||||
	lockdep_assert_held(&conf->mddev->reconfig_mutex);
 | 
			
		||||
	for (i = 0; i < conf->geo.raid_disks; i++) {
 | 
			
		||||
		rdev = conf->mirrors[i].rdev;
 | 
			
		||||
		if (rdev)
 | 
			
		||||
| 
						 | 
				
			
			@ -2190,7 +2139,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 | 
			
		|||
		err = 0;
 | 
			
		||||
		if (rdev->saved_raid_disk != mirror)
 | 
			
		||||
			conf->fullsync = 1;
 | 
			
		||||
		rcu_assign_pointer(p->rdev, rdev);
 | 
			
		||||
		WRITE_ONCE(p->rdev, rdev);
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2204,7 +2153,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 | 
			
		|||
			disk_stack_limits(mddev->gendisk, rdev->bdev,
 | 
			
		||||
					  rdev->data_offset << 9);
 | 
			
		||||
		conf->fullsync = 1;
 | 
			
		||||
		rcu_assign_pointer(p->replacement, rdev);
 | 
			
		||||
		WRITE_ONCE(p->replacement, rdev);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	print_conf(conf);
 | 
			
		||||
| 
						 | 
				
			
			@ -2246,15 +2195,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
 | 
			
		|||
		err = -EBUSY;
 | 
			
		||||
		goto abort;
 | 
			
		||||
	}
 | 
			
		||||
	*rdevp = NULL;
 | 
			
		||||
	WRITE_ONCE(*rdevp, NULL);
 | 
			
		||||
	if (p->replacement) {
 | 
			
		||||
		/* We must have just cleared 'rdev' */
 | 
			
		||||
		p->rdev = p->replacement;
 | 
			
		||||
		WRITE_ONCE(p->rdev, p->replacement);
 | 
			
		||||
		clear_bit(Replacement, &p->replacement->flags);
 | 
			
		||||
		smp_mb(); /* Make sure other CPUs may see both as identical
 | 
			
		||||
			   * but will never see neither -- if they are careful.
 | 
			
		||||
			   */
 | 
			
		||||
		p->replacement = NULL;
 | 
			
		||||
		WRITE_ONCE(p->replacement, NULL);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	clear_bit(WantReplacement, &rdev->flags);
 | 
			
		||||
| 
						 | 
				
			
			@ -2754,20 +2700,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 | 
			
		|||
		if (s > (PAGE_SIZE>>9))
 | 
			
		||||
			s = PAGE_SIZE >> 9;
 | 
			
		||||
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		do {
 | 
			
		||||
			sector_t first_bad;
 | 
			
		||||
			int bad_sectors;
 | 
			
		||||
 | 
			
		||||
			d = r10_bio->devs[sl].devnum;
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			rdev = conf->mirrors[d].rdev;
 | 
			
		||||
			if (rdev &&
 | 
			
		||||
			    test_bit(In_sync, &rdev->flags) &&
 | 
			
		||||
			    !test_bit(Faulty, &rdev->flags) &&
 | 
			
		||||
			    is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
 | 
			
		||||
					&first_bad, &bad_sectors) == 0) {
 | 
			
		||||
				atomic_inc(&rdev->nr_pending);
 | 
			
		||||
				rcu_read_unlock();
 | 
			
		||||
				success = sync_page_io(rdev,
 | 
			
		||||
						       r10_bio->devs[sl].addr +
 | 
			
		||||
						       sect,
 | 
			
		||||
| 
						 | 
				
			
			@ -2775,7 +2719,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 | 
			
		|||
						       conf->tmppage,
 | 
			
		||||
						       REQ_OP_READ, false);
 | 
			
		||||
				rdev_dec_pending(rdev, mddev);
 | 
			
		||||
				rcu_read_lock();
 | 
			
		||||
				if (success)
 | 
			
		||||
					break;
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -2783,7 +2726,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 | 
			
		|||
			if (sl == conf->copies)
 | 
			
		||||
				sl = 0;
 | 
			
		||||
		} while (sl != slot);
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
		if (!success) {
 | 
			
		||||
			/* Cannot read from anywhere, just mark the block
 | 
			
		||||
| 
						 | 
				
			
			@ -2807,20 +2749,18 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 | 
			
		|||
 | 
			
		||||
		start = sl;
 | 
			
		||||
		/* write it back and re-read */
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		while (sl != slot) {
 | 
			
		||||
			if (sl==0)
 | 
			
		||||
				sl = conf->copies;
 | 
			
		||||
			sl--;
 | 
			
		||||
			d = r10_bio->devs[sl].devnum;
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			rdev = conf->mirrors[d].rdev;
 | 
			
		||||
			if (!rdev ||
 | 
			
		||||
			    test_bit(Faulty, &rdev->flags) ||
 | 
			
		||||
			    !test_bit(In_sync, &rdev->flags))
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			atomic_inc(&rdev->nr_pending);
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
			if (r10_sync_page_io(rdev,
 | 
			
		||||
					     r10_bio->devs[sl].addr +
 | 
			
		||||
					     sect,
 | 
			
		||||
| 
						 | 
				
			
			@ -2839,7 +2779,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 | 
			
		|||
					  rdev->bdev);
 | 
			
		||||
			}
 | 
			
		||||
			rdev_dec_pending(rdev, mddev);
 | 
			
		||||
			rcu_read_lock();
 | 
			
		||||
		}
 | 
			
		||||
		sl = start;
 | 
			
		||||
		while (sl != slot) {
 | 
			
		||||
| 
						 | 
				
			
			@ -2847,14 +2786,13 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 | 
			
		|||
				sl = conf->copies;
 | 
			
		||||
			sl--;
 | 
			
		||||
			d = r10_bio->devs[sl].devnum;
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			rdev = conf->mirrors[d].rdev;
 | 
			
		||||
			if (!rdev ||
 | 
			
		||||
			    test_bit(Faulty, &rdev->flags) ||
 | 
			
		||||
			    !test_bit(In_sync, &rdev->flags))
 | 
			
		||||
				continue;
 | 
			
		||||
 | 
			
		||||
			atomic_inc(&rdev->nr_pending);
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
			switch (r10_sync_page_io(rdev,
 | 
			
		||||
					     r10_bio->devs[sl].addr +
 | 
			
		||||
					     sect,
 | 
			
		||||
| 
						 | 
				
			
			@ -2882,9 +2820,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 | 
			
		|||
			}
 | 
			
		||||
 | 
			
		||||
			rdev_dec_pending(rdev, mddev);
 | 
			
		||||
			rcu_read_lock();
 | 
			
		||||
		}
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
		sectors -= s;
 | 
			
		||||
		sect += s;
 | 
			
		||||
| 
						 | 
				
			
			@ -3358,14 +3294,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
				/* Completed a full sync so the replacements
 | 
			
		||||
				 * are now fully recovered.
 | 
			
		||||
				 */
 | 
			
		||||
				rcu_read_lock();
 | 
			
		||||
				for (i = 0; i < conf->geo.raid_disks; i++) {
 | 
			
		||||
					struct md_rdev *rdev =
 | 
			
		||||
						rcu_dereference(conf->mirrors[i].replacement);
 | 
			
		||||
						conf->mirrors[i].replacement;
 | 
			
		||||
 | 
			
		||||
					if (rdev)
 | 
			
		||||
						rdev->recovery_offset = MaxSector;
 | 
			
		||||
				}
 | 
			
		||||
				rcu_read_unlock();
 | 
			
		||||
			}
 | 
			
		||||
			conf->fullsync = 0;
 | 
			
		||||
		}
 | 
			
		||||
| 
						 | 
				
			
			@ -3446,9 +3381,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
			struct raid10_info *mirror = &conf->mirrors[i];
 | 
			
		||||
			struct md_rdev *mrdev, *mreplace;
 | 
			
		||||
 | 
			
		||||
			rcu_read_lock();
 | 
			
		||||
			mrdev = rcu_dereference(mirror->rdev);
 | 
			
		||||
			mreplace = rcu_dereference(mirror->replacement);
 | 
			
		||||
			mrdev = mirror->rdev;
 | 
			
		||||
			mreplace = mirror->replacement;
 | 
			
		||||
 | 
			
		||||
			if (mrdev && (test_bit(Faulty, &mrdev->flags) ||
 | 
			
		||||
			    test_bit(In_sync, &mrdev->flags)))
 | 
			
		||||
| 
						 | 
				
			
			@ -3456,22 +3390,18 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
			if (mreplace && test_bit(Faulty, &mreplace->flags))
 | 
			
		||||
				mreplace = NULL;
 | 
			
		||||
 | 
			
		||||
			if (!mrdev && !mreplace) {
 | 
			
		||||
				rcu_read_unlock();
 | 
			
		||||
			if (!mrdev && !mreplace)
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			still_degraded = 0;
 | 
			
		||||
			/* want to reconstruct this device */
 | 
			
		||||
			rb2 = r10_bio;
 | 
			
		||||
			sect = raid10_find_virt(conf, sector_nr, i);
 | 
			
		||||
			if (sect >= mddev->resync_max_sectors) {
 | 
			
		||||
			if (sect >= mddev->resync_max_sectors)
 | 
			
		||||
				/* last stripe is not complete - don't
 | 
			
		||||
				 * try to recover this sector.
 | 
			
		||||
				 */
 | 
			
		||||
				rcu_read_unlock();
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
			/* Unless we are doing a full sync, or a replacement
 | 
			
		||||
			 * we only need to recover the block if it is set in
 | 
			
		||||
			 * the bitmap
 | 
			
		||||
| 
						 | 
				
			
			@ -3487,14 +3417,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
				 * that there will never be anything to do here
 | 
			
		||||
				 */
 | 
			
		||||
				chunks_skipped = -1;
 | 
			
		||||
				rcu_read_unlock();
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
			if (mrdev)
 | 
			
		||||
				atomic_inc(&mrdev->nr_pending);
 | 
			
		||||
			if (mreplace)
 | 
			
		||||
				atomic_inc(&mreplace->nr_pending);
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
 | 
			
		||||
			r10_bio = raid10_alloc_init_r10buf(conf);
 | 
			
		||||
			r10_bio->state = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -3513,10 +3441,9 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
			/* Need to check if the array will still be
 | 
			
		||||
			 * degraded
 | 
			
		||||
			 */
 | 
			
		||||
			rcu_read_lock();
 | 
			
		||||
			for (j = 0; j < conf->geo.raid_disks; j++) {
 | 
			
		||||
				struct md_rdev *rdev = rcu_dereference(
 | 
			
		||||
					conf->mirrors[j].rdev);
 | 
			
		||||
				struct md_rdev *rdev = conf->mirrors[j].rdev;
 | 
			
		||||
 | 
			
		||||
				if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
 | 
			
		||||
					still_degraded = 1;
 | 
			
		||||
					break;
 | 
			
		||||
| 
						 | 
				
			
			@ -3531,8 +3458,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
				int k;
 | 
			
		||||
				int d = r10_bio->devs[j].devnum;
 | 
			
		||||
				sector_t from_addr, to_addr;
 | 
			
		||||
				struct md_rdev *rdev =
 | 
			
		||||
					rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
				struct md_rdev *rdev = conf->mirrors[d].rdev;
 | 
			
		||||
				sector_t sector, first_bad;
 | 
			
		||||
				int bad_sectors;
 | 
			
		||||
				if (!rdev ||
 | 
			
		||||
| 
						 | 
				
			
			@ -3611,7 +3537,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
				atomic_inc(&r10_bio->remaining);
 | 
			
		||||
				break;
 | 
			
		||||
			}
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
			if (j == conf->copies) {
 | 
			
		||||
				/* Cannot recover, so abort the recovery or
 | 
			
		||||
				 * record a bad block */
 | 
			
		||||
| 
						 | 
				
			
			@ -3738,12 +3663,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
 | 
			
		||||
			bio = r10_bio->devs[i].bio;
 | 
			
		||||
			bio->bi_status = BLK_STS_IOERR;
 | 
			
		||||
			rcu_read_lock();
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
 | 
			
		||||
				rcu_read_unlock();
 | 
			
		||||
			rdev = conf->mirrors[d].rdev;
 | 
			
		||||
			if (rdev == NULL || test_bit(Faulty, &rdev->flags))
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			sector = r10_bio->devs[i].addr;
 | 
			
		||||
			if (is_badblock(rdev, sector, max_sync,
 | 
			
		||||
					&first_bad, &bad_sectors)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -3753,7 +3676,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
					bad_sectors -= (sector - first_bad);
 | 
			
		||||
					if (max_sync > bad_sectors)
 | 
			
		||||
						max_sync = bad_sectors;
 | 
			
		||||
					rcu_read_unlock();
 | 
			
		||||
					continue;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
| 
						 | 
				
			
			@ -3769,11 +3691,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
			bio_set_dev(bio, rdev->bdev);
 | 
			
		||||
			count++;
 | 
			
		||||
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].replacement);
 | 
			
		||||
			if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
 | 
			
		||||
				rcu_read_unlock();
 | 
			
		||||
			rdev = conf->mirrors[d].replacement;
 | 
			
		||||
			if (rdev == NULL || test_bit(Faulty, &rdev->flags))
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			atomic_inc(&rdev->nr_pending);
 | 
			
		||||
 | 
			
		||||
			/* Need to set up for writing to the replacement */
 | 
			
		||||
| 
						 | 
				
			
			@ -3790,7 +3711,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
			bio->bi_iter.bi_sector = sector + rdev->data_offset;
 | 
			
		||||
			bio_set_dev(bio, rdev->bdev);
 | 
			
		||||
			count++;
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (count < 2) {
 | 
			
		||||
| 
						 | 
				
			
			@ -4500,11 +4420,11 @@ static int calc_degraded(struct r10conf *conf)
 | 
			
		|||
	int degraded, degraded2;
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	degraded = 0;
 | 
			
		||||
	/* 'prev' section first */
 | 
			
		||||
	for (i = 0; i < conf->prev.raid_disks; i++) {
 | 
			
		||||
		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
 | 
			
		||||
		struct md_rdev *rdev = conf->mirrors[i].rdev;
 | 
			
		||||
 | 
			
		||||
		if (!rdev || test_bit(Faulty, &rdev->flags))
 | 
			
		||||
			degraded++;
 | 
			
		||||
		else if (!test_bit(In_sync, &rdev->flags))
 | 
			
		||||
| 
						 | 
				
			
			@ -4514,13 +4434,12 @@ static int calc_degraded(struct r10conf *conf)
 | 
			
		|||
			 */
 | 
			
		||||
			degraded++;
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	if (conf->geo.raid_disks == conf->prev.raid_disks)
 | 
			
		||||
		return degraded;
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	degraded2 = 0;
 | 
			
		||||
	for (i = 0; i < conf->geo.raid_disks; i++) {
 | 
			
		||||
		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
 | 
			
		||||
		struct md_rdev *rdev = conf->mirrors[i].rdev;
 | 
			
		||||
 | 
			
		||||
		if (!rdev || test_bit(Faulty, &rdev->flags))
 | 
			
		||||
			degraded2++;
 | 
			
		||||
		else if (!test_bit(In_sync, &rdev->flags)) {
 | 
			
		||||
| 
						 | 
				
			
			@ -4533,7 +4452,6 @@ static int calc_degraded(struct r10conf *conf)
 | 
			
		|||
				degraded2++;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	if (degraded2 > degraded)
 | 
			
		||||
		return degraded2;
 | 
			
		||||
	return degraded;
 | 
			
		||||
| 
						 | 
				
			
			@ -4965,16 +4883,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
	blist = read_bio;
 | 
			
		||||
	read_bio->bi_next = NULL;
 | 
			
		||||
 | 
			
		||||
	rcu_read_lock();
 | 
			
		||||
	for (s = 0; s < conf->copies*2; s++) {
 | 
			
		||||
		struct bio *b;
 | 
			
		||||
		int d = r10_bio->devs[s/2].devnum;
 | 
			
		||||
		struct md_rdev *rdev2;
 | 
			
		||||
		if (s&1) {
 | 
			
		||||
			rdev2 = rcu_dereference(conf->mirrors[d].replacement);
 | 
			
		||||
			rdev2 = conf->mirrors[d].replacement;
 | 
			
		||||
			b = r10_bio->devs[s/2].repl_bio;
 | 
			
		||||
		} else {
 | 
			
		||||
			rdev2 = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			rdev2 = conf->mirrors[d].rdev;
 | 
			
		||||
			b = r10_bio->devs[s/2].bio;
 | 
			
		||||
		}
 | 
			
		||||
		if (!rdev2 || test_bit(Faulty, &rdev2->flags))
 | 
			
		||||
| 
						 | 
				
			
			@ -5008,7 +4925,6 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 | 
			
		|||
		sector_nr += len >> 9;
 | 
			
		||||
		nr_sectors += len >> 9;
 | 
			
		||||
	}
 | 
			
		||||
	rcu_read_unlock();
 | 
			
		||||
	r10_bio->sectors = nr_sectors;
 | 
			
		||||
 | 
			
		||||
	/* Now submit the read */
 | 
			
		||||
| 
						 | 
				
			
			@ -5061,20 +4977,17 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 | 
			
		|||
		struct bio *b;
 | 
			
		||||
		int d = r10_bio->devs[s/2].devnum;
 | 
			
		||||
		struct md_rdev *rdev;
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		if (s&1) {
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].replacement);
 | 
			
		||||
			rdev = conf->mirrors[d].replacement;
 | 
			
		||||
			b = r10_bio->devs[s/2].repl_bio;
 | 
			
		||||
		} else {
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			rdev = conf->mirrors[d].rdev;
 | 
			
		||||
			b = r10_bio->devs[s/2].bio;
 | 
			
		||||
		}
 | 
			
		||||
		if (!rdev || test_bit(Faulty, &rdev->flags)) {
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
		if (!rdev || test_bit(Faulty, &rdev->flags))
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		atomic_inc(&rdev->nr_pending);
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
		md_sync_acct_bio(b, r10_bio->sectors);
 | 
			
		||||
		atomic_inc(&r10_bio->remaining);
 | 
			
		||||
		b->bi_next = NULL;
 | 
			
		||||
| 
						 | 
				
			
			@ -5145,10 +5058,9 @@ static int handle_reshape_read_error(struct mddev *mddev,
 | 
			
		|||
		if (s > (PAGE_SIZE >> 9))
 | 
			
		||||
			s = PAGE_SIZE >> 9;
 | 
			
		||||
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		while (!success) {
 | 
			
		||||
			int d = r10b->devs[slot].devnum;
 | 
			
		||||
			struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			struct md_rdev *rdev = conf->mirrors[d].rdev;
 | 
			
		||||
			sector_t addr;
 | 
			
		||||
			if (rdev == NULL ||
 | 
			
		||||
			    test_bit(Faulty, &rdev->flags) ||
 | 
			
		||||
| 
						 | 
				
			
			@ -5157,14 +5069,12 @@ static int handle_reshape_read_error(struct mddev *mddev,
 | 
			
		|||
 | 
			
		||||
			addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
 | 
			
		||||
			atomic_inc(&rdev->nr_pending);
 | 
			
		||||
			rcu_read_unlock();
 | 
			
		||||
			success = sync_page_io(rdev,
 | 
			
		||||
					       addr,
 | 
			
		||||
					       s << 9,
 | 
			
		||||
					       pages[idx],
 | 
			
		||||
					       REQ_OP_READ, false);
 | 
			
		||||
			rdev_dec_pending(rdev, mddev);
 | 
			
		||||
			rcu_read_lock();
 | 
			
		||||
			if (success)
 | 
			
		||||
				break;
 | 
			
		||||
		failed:
 | 
			
		||||
| 
						 | 
				
			
			@ -5174,7 +5084,6 @@ static int handle_reshape_read_error(struct mddev *mddev,
 | 
			
		|||
			if (slot == first_slot)
 | 
			
		||||
				break;
 | 
			
		||||
		}
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
		if (!success) {
 | 
			
		||||
			/* couldn't read this block, must give up */
 | 
			
		||||
			set_bit(MD_RECOVERY_INTR,
 | 
			
		||||
| 
						 | 
				
			
			@ -5200,12 +5109,8 @@ static void end_reshape_write(struct bio *bio)
 | 
			
		|||
	struct md_rdev *rdev = NULL;
 | 
			
		||||
 | 
			
		||||
	d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 | 
			
		||||
	if (repl)
 | 
			
		||||
		rdev = conf->mirrors[d].replacement;
 | 
			
		||||
	if (!rdev) {
 | 
			
		||||
		smp_mb();
 | 
			
		||||
		rdev = conf->mirrors[d].rdev;
 | 
			
		||||
	}
 | 
			
		||||
	rdev = repl ? conf->mirrors[d].replacement :
 | 
			
		||||
		      conf->mirrors[d].rdev;
 | 
			
		||||
 | 
			
		||||
	if (bio->bi_status) {
 | 
			
		||||
		/* FIXME should record badblock */
 | 
			
		||||
| 
						 | 
				
			
			@ -5240,18 +5145,16 @@ static void raid10_finish_reshape(struct mddev *mddev)
 | 
			
		|||
		mddev->resync_max_sectors = mddev->array_sectors;
 | 
			
		||||
	} else {
 | 
			
		||||
		int d;
 | 
			
		||||
		rcu_read_lock();
 | 
			
		||||
		for (d = conf->geo.raid_disks ;
 | 
			
		||||
		     d < conf->geo.raid_disks - mddev->delta_disks;
 | 
			
		||||
		     d++) {
 | 
			
		||||
			struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
 | 
			
		||||
			struct md_rdev *rdev = conf->mirrors[d].rdev;
 | 
			
		||||
			if (rdev)
 | 
			
		||||
				clear_bit(In_sync, &rdev->flags);
 | 
			
		||||
			rdev = rcu_dereference(conf->mirrors[d].replacement);
 | 
			
		||||
			rdev = conf->mirrors[d].replacement;
 | 
			
		||||
			if (rdev)
 | 
			
		||||
				clear_bit(In_sync, &rdev->flags);
 | 
			
		||||
		}
 | 
			
		||||
		rcu_read_unlock();
 | 
			
		||||
	}
 | 
			
		||||
	mddev->layout = mddev->new_layout;
 | 
			
		||||
	mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue