mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	md: fix stopping sync thread
Currently sync thread is stopped from multiple contex: - idle_sync_thread - frozen_sync_thread - __md_stop_writes - md_set_readonly - do_md_stop And there are some problems: 1) sync_work is flushed while reconfig_mutex is grabbed, this can deadlock because the work function will grab reconfig_mutex as well. 2) md_reap_sync_thread() can't be called directly while md_do_sync() is not finished yet, for example, commit130443d60b("md: refactor idle/frozen_sync_thread() to fix deadlock"). 3) If MD_RECOVERY_RUNNING is not set, there is no need to stop sync_thread at all because sync_thread must not be registered. Factor out a helper stop_sync_thread(), so that above contex will behave the same. Fix 1) by flushing sync_work after reconfig_mutex is released, before waiting for sync_thread to be done; Fix 2) bt letting daemon thread to unregister sync_thread; Fix 3) by always checking MD_RECOVERY_RUNNING first. Fixes:db5e653d7c("md: delay choosing sync action to md_start_sync()") Signed-off-by: Yu Kuai <yukuai3@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20231205094215.1824240-4-yukuai1@huaweicloud.com
This commit is contained in:
		
							parent
							
								
									c9f7cb5b2b
								
							
						
					
					
						commit
						f52f5c71f3
					
				
					 1 changed files with 37 additions and 53 deletions
				
			
		| 
						 | 
					@ -4840,25 +4840,29 @@ action_show(struct mddev *mddev, char *page)
 | 
				
			||||||
	return sprintf(page, "%s\n", type);
 | 
						return sprintf(page, "%s\n", type);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void stop_sync_thread(struct mddev *mddev)
 | 
					/**
 | 
				
			||||||
{
 | 
					 * stop_sync_thread() - wait for sync_thread to stop if it's running.
 | 
				
			||||||
	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 | 
					 * @mddev:	the array.
 | 
				
			||||||
		return;
 | 
					 * @locked:	if set, reconfig_mutex will still be held after this function
 | 
				
			||||||
 | 
					 *		return; if not set, reconfig_mutex will be released after this
 | 
				
			||||||
	if (mddev_lock(mddev))
 | 
					 *		function return.
 | 
				
			||||||
		return;
 | 
					 * @check_seq:	if set, only wait for curent running sync_thread to stop, noted
 | 
				
			||||||
 | 
					 *		that new sync_thread can still start.
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
 | 
					 | 
				
			||||||
	 * held.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int sync_seq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (check_seq)
 | 
				
			||||||
 | 
							sync_seq = atomic_read(&mddev->sync_seq);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
 | 
						if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
 | 
				
			||||||
 | 
							if (!locked)
 | 
				
			||||||
			mddev_unlock(mddev);
 | 
								mddev_unlock(mddev);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (work_pending(&mddev->sync_work))
 | 
						mddev_unlock(mddev);
 | 
				
			||||||
		flush_workqueue(md_misc_wq);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 | 
						set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -4866,21 +4870,28 @@ static void stop_sync_thread(struct mddev *mddev)
 | 
				
			||||||
	 * never happen
 | 
						 * never happen
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	md_wakeup_thread_directly(mddev->sync_thread);
 | 
						md_wakeup_thread_directly(mddev->sync_thread);
 | 
				
			||||||
 | 
						if (work_pending(&mddev->sync_work))
 | 
				
			||||||
 | 
							flush_work(&mddev->sync_work);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mddev_unlock(mddev);
 | 
						wait_event(resync_wait,
 | 
				
			||||||
 | 
							   !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
 | 
				
			||||||
 | 
							   (check_seq && sync_seq != atomic_read(&mddev->sync_seq)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (locked)
 | 
				
			||||||
 | 
							mddev_lock_nointr(mddev);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void idle_sync_thread(struct mddev *mddev)
 | 
					static void idle_sync_thread(struct mddev *mddev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int sync_seq = atomic_read(&mddev->sync_seq);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	mutex_lock(&mddev->sync_mutex);
 | 
						mutex_lock(&mddev->sync_mutex);
 | 
				
			||||||
	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
						clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
				
			||||||
	stop_sync_thread(mddev);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
 | 
						if (mddev_lock(mddev)) {
 | 
				
			||||||
			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
 | 
							mutex_unlock(&mddev->sync_mutex);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						stop_sync_thread(mddev, false, true);
 | 
				
			||||||
	mutex_unlock(&mddev->sync_mutex);
 | 
						mutex_unlock(&mddev->sync_mutex);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4888,11 +4899,13 @@ static void frozen_sync_thread(struct mddev *mddev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	mutex_lock(&mddev->sync_mutex);
 | 
						mutex_lock(&mddev->sync_mutex);
 | 
				
			||||||
	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
						set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
				
			||||||
	stop_sync_thread(mddev);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	wait_event(resync_wait, mddev->sync_thread == NULL &&
 | 
						if (mddev_lock(mddev)) {
 | 
				
			||||||
			!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
 | 
							mutex_unlock(&mddev->sync_mutex);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						stop_sync_thread(mddev, false, false);
 | 
				
			||||||
	mutex_unlock(&mddev->sync_mutex);
 | 
						mutex_unlock(&mddev->sync_mutex);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6264,14 +6277,7 @@ static void md_clean(struct mddev *mddev)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __md_stop_writes(struct mddev *mddev)
 | 
					static void __md_stop_writes(struct mddev *mddev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
						stop_sync_thread(mddev, true, false);
 | 
				
			||||||
	if (work_pending(&mddev->sync_work))
 | 
					 | 
				
			||||||
		flush_workqueue(md_misc_wq);
 | 
					 | 
				
			||||||
	if (mddev->sync_thread) {
 | 
					 | 
				
			||||||
		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 | 
					 | 
				
			||||||
		md_reap_sync_thread(mddev);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	del_timer_sync(&mddev->safemode_timer);
 | 
						del_timer_sync(&mddev->safemode_timer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (mddev->pers && mddev->pers->quiesce) {
 | 
						if (mddev->pers && mddev->pers->quiesce) {
 | 
				
			||||||
| 
						 | 
					@ -6363,18 +6369,8 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 | 
				
			||||||
		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
							set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
				
			||||||
		md_wakeup_thread(mddev->thread);
 | 
							md_wakeup_thread(mddev->thread);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 | 
					 | 
				
			||||||
		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						stop_sync_thread(mddev, false, false);
 | 
				
			||||||
	 * Thread might be blocked waiting for metadata update which will now
 | 
					 | 
				
			||||||
	 * never happen
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	md_wakeup_thread_directly(mddev->sync_thread);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	mddev_unlock(mddev);
 | 
					 | 
				
			||||||
	wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
 | 
					 | 
				
			||||||
					  &mddev->recovery));
 | 
					 | 
				
			||||||
	wait_event(mddev->sb_wait,
 | 
						wait_event(mddev->sb_wait,
 | 
				
			||||||
		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 | 
							   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 | 
				
			||||||
	mddev_lock_nointr(mddev);
 | 
						mddev_lock_nointr(mddev);
 | 
				
			||||||
| 
						 | 
					@ -6428,20 +6424,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
 | 
				
			||||||
		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
							set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 | 
				
			||||||
		md_wakeup_thread(mddev->thread);
 | 
							md_wakeup_thread(mddev->thread);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
 | 
					 | 
				
			||||||
		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						stop_sync_thread(mddev, true, false);
 | 
				
			||||||
	 * Thread might be blocked waiting for metadata update which will now
 | 
					 | 
				
			||||||
	 * never happen
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	md_wakeup_thread_directly(mddev->sync_thread);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	mddev_unlock(mddev);
 | 
					 | 
				
			||||||
	wait_event(resync_wait, (mddev->sync_thread == NULL &&
 | 
					 | 
				
			||||||
				 !test_bit(MD_RECOVERY_RUNNING,
 | 
					 | 
				
			||||||
					   &mddev->recovery)));
 | 
					 | 
				
			||||||
	mddev_lock_nointr(mddev);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_lock(&mddev->open_mutex);
 | 
						mutex_lock(&mddev->open_mutex);
 | 
				
			||||||
	if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
 | 
						if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue