forked from mirrors/linux
		
	md/raid1: clean up read_balance.
read_balance has two loops which both look for a 'best' device based on slightly different criteria. This is clumsy and makes is hard to add extra criteria. So replace it all with a single loop that combines everything. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
		
							parent
							
								
									56d9912106
								
							
						
					
					
						commit
						76073054c9
					
				
					 1 changed files with 34 additions and 49 deletions
				
			
		|  | @ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | ||||||
| { | { | ||||||
| 	const sector_t this_sector = r1_bio->sector; | 	const sector_t this_sector = r1_bio->sector; | ||||||
| 	const int sectors = r1_bio->sectors; | 	const int sectors = r1_bio->sectors; | ||||||
| 	int new_disk = -1; |  | ||||||
| 	int start_disk; | 	int start_disk; | ||||||
|  | 	int best_disk; | ||||||
| 	int i; | 	int i; | ||||||
| 	sector_t new_distance, current_distance; | 	sector_t best_dist; | ||||||
| 	mdk_rdev_t *rdev; | 	mdk_rdev_t *rdev; | ||||||
| 	int choose_first; | 	int choose_first; | ||||||
| 
 | 
 | ||||||
|  | @ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | ||||||
| 	 * We take the first readable disk when above the resync window. | 	 * We take the first readable disk when above the resync window. | ||||||
| 	 */ | 	 */ | ||||||
|  retry: |  retry: | ||||||
|  | 	best_disk = -1; | ||||||
|  | 	best_dist = MaxSector; | ||||||
| 	if (conf->mddev->recovery_cp < MaxSector && | 	if (conf->mddev->recovery_cp < MaxSector && | ||||||
| 	    (this_sector + sectors >= conf->next_resync)) { | 	    (this_sector + sectors >= conf->next_resync)) { | ||||||
| 		choose_first = 1; | 		choose_first = 1; | ||||||
|  | @ -434,8 +436,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | ||||||
| 		start_disk = conf->last_used; | 		start_disk = conf->last_used; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* make sure the disk is operational */ |  | ||||||
| 	for (i = 0 ; i < conf->raid_disks ; i++) { | 	for (i = 0 ; i < conf->raid_disks ; i++) { | ||||||
|  | 		sector_t dist; | ||||||
| 		int disk = start_disk + i; | 		int disk = start_disk + i; | ||||||
| 		if (disk >= conf->raid_disks) | 		if (disk >= conf->raid_disks) | ||||||
| 			disk -= conf->raid_disks; | 			disk -= conf->raid_disks; | ||||||
|  | @ -443,60 +445,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | ||||||
| 		rdev = rcu_dereference(conf->mirrors[disk].rdev); | 		rdev = rcu_dereference(conf->mirrors[disk].rdev); | ||||||
| 		if (r1_bio->bios[disk] == IO_BLOCKED | 		if (r1_bio->bios[disk] == IO_BLOCKED | ||||||
| 		    || rdev == NULL | 		    || rdev == NULL | ||||||
| 		    || !test_bit(In_sync, &rdev->flags)) | 		    || test_bit(Faulty, &rdev->flags)) | ||||||
| 			continue; | 			continue; | ||||||
| 
 | 		if (!test_bit(In_sync, &rdev->flags) && | ||||||
| 		new_disk = disk; | 		    rdev->recovery_offset < this_sector + sectors) | ||||||
| 		if (!test_bit(WriteMostly, &rdev->flags)) |  | ||||||
| 			break; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if (new_disk < 0 || choose_first) |  | ||||||
| 		goto rb_out; |  | ||||||
| 
 |  | ||||||
| 	/*
 |  | ||||||
| 	 * Don't change to another disk for sequential reads: |  | ||||||
| 	 */ |  | ||||||
| 	if (conf->next_seq_sect == this_sector) |  | ||||||
| 		goto rb_out; |  | ||||||
| 	if (this_sector == conf->mirrors[new_disk].head_position) |  | ||||||
| 		goto rb_out; |  | ||||||
| 
 |  | ||||||
| 	current_distance = abs(this_sector  |  | ||||||
| 			       - conf->mirrors[new_disk].head_position); |  | ||||||
| 
 |  | ||||||
| 	/* look for a better disk - i.e. head is closer */ |  | ||||||
| 	start_disk = new_disk; |  | ||||||
| 	for (i = 1; i < conf->raid_disks; i++) { |  | ||||||
| 		int disk = start_disk + 1; |  | ||||||
| 		if (disk >= conf->raid_disks) |  | ||||||
| 			disk -= conf->raid_disks; |  | ||||||
| 
 |  | ||||||
| 		rdev = rcu_dereference(conf->mirrors[disk].rdev); |  | ||||||
| 		if (r1_bio->bios[disk] == IO_BLOCKED |  | ||||||
| 		    || rdev == NULL |  | ||||||
| 		    || !test_bit(In_sync, &rdev->flags) |  | ||||||
| 		    || test_bit(WriteMostly, &rdev->flags)) |  | ||||||
| 			continue; | 			continue; | ||||||
| 
 | 		if (test_bit(WriteMostly, &rdev->flags)) { | ||||||
| 		if (!atomic_read(&rdev->nr_pending)) { | 			/* Don't balance among write-mostly, just
 | ||||||
| 			new_disk = disk; | 			 * use the first as a last resort */ | ||||||
|  | 			if (best_disk < 0) | ||||||
|  | 				best_disk = disk; | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 		/* This is a reasonable device to use.  It might
 | ||||||
|  | 		 * even be best. | ||||||
|  | 		 */ | ||||||
|  | 		dist = abs(this_sector - conf->mirrors[disk].head_position); | ||||||
|  | 		if (choose_first | ||||||
|  | 		    /* Don't change to another disk for sequential reads */ | ||||||
|  | 		    || conf->next_seq_sect == this_sector | ||||||
|  | 		    || dist == 0 | ||||||
|  | 		    /* If device is idle, use it */ | ||||||
|  | 		    || atomic_read(&rdev->nr_pending) == 0) { | ||||||
|  | 			best_disk = disk; | ||||||
| 			break; | 			break; | ||||||
| 		} | 		} | ||||||
| 		new_distance = abs(this_sector - conf->mirrors[disk].head_position); | 		if (dist < best_dist) { | ||||||
| 		if (new_distance < current_distance) { | 			best_dist = dist; | ||||||
| 			current_distance = new_distance; | 			best_disk = disk; | ||||||
| 			new_disk = disk; |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  rb_out: | 	if (best_disk >= 0) { | ||||||
| 	if (new_disk >= 0) { | 		rdev = rcu_dereference(conf->mirrors[best_disk].rdev); | ||||||
| 		rdev = rcu_dereference(conf->mirrors[new_disk].rdev); |  | ||||||
| 		if (!rdev) | 		if (!rdev) | ||||||
| 			goto retry; | 			goto retry; | ||||||
| 		atomic_inc(&rdev->nr_pending); | 		atomic_inc(&rdev->nr_pending); | ||||||
| 		if (!test_bit(In_sync, &rdev->flags)) { | 		if (test_bit(Faulty, &rdev->flags)) { | ||||||
| 			/* cannot risk returning a device that failed
 | 			/* cannot risk returning a device that failed
 | ||||||
| 			 * before we inc'ed nr_pending | 			 * before we inc'ed nr_pending | ||||||
| 			 */ | 			 */ | ||||||
|  | @ -504,11 +489,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | ||||||
| 			goto retry; | 			goto retry; | ||||||
| 		} | 		} | ||||||
| 		conf->next_seq_sect = this_sector + sectors; | 		conf->next_seq_sect = this_sector + sectors; | ||||||
| 		conf->last_used = new_disk; | 		conf->last_used = best_disk; | ||||||
| 	} | 	} | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| 
 | 
 | ||||||
| 	return new_disk; | 	return best_disk; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int raid1_congested(void *data, int bits) | static int raid1_congested(void *data, int bits) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 NeilBrown
						NeilBrown