forked from mirrors/linux
		
	watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work
Oleg suggested to replace the "watchdog/%u" threads with cpu_stop_work. That removes one thread per CPU while at the same time fixes softlockup vs SCHED_DEADLINE. But more importantly, it does away with the single smpboot_update_cpumask_percpu_thread() user, which allows cleanups/shrinkage of the smpboot interface. Suggested-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									4520843dfa
								
							
						
					
					
						commit
						9cf57731b6
					
				
					 4 changed files with 75 additions and 81 deletions
				
			
		|  | @ -164,6 +164,7 @@ enum cpuhp_state { | |||
| 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, | ||||
| 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, | ||||
| 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, | ||||
| 	CPUHP_AP_WATCHDOG_ONLINE, | ||||
| 	CPUHP_AP_WORKQUEUE_ONLINE, | ||||
| 	CPUHP_AP_RCUTREE_ONLINE, | ||||
| 	CPUHP_AP_ONLINE_DYN, | ||||
|  |  | |||
|  | @ -33,10 +33,15 @@ extern int sysctl_hardlockup_all_cpu_backtrace; | |||
| #define sysctl_hardlockup_all_cpu_backtrace 0 | ||||
| #endif /* !CONFIG_SMP */ | ||||
| 
 | ||||
| extern int lockup_detector_online_cpu(unsigned int cpu); | ||||
| extern int lockup_detector_offline_cpu(unsigned int cpu); | ||||
| 
 | ||||
| #else /* CONFIG_LOCKUP_DETECTOR */ | ||||
| static inline void lockup_detector_init(void) { } | ||||
| static inline void lockup_detector_soft_poweroff(void) { } | ||||
| static inline void lockup_detector_cleanup(void) { } | ||||
| #define lockup_detector_online_cpu	NULL | ||||
| #define lockup_detector_offline_cpu	NULL | ||||
| #endif /* !CONFIG_LOCKUP_DETECTOR */ | ||||
| 
 | ||||
| #ifdef CONFIG_SOFTLOCKUP_DETECTOR | ||||
|  |  | |||
|  | @ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { | |||
| 		.startup.single		= perf_event_init_cpu, | ||||
| 		.teardown.single	= perf_event_exit_cpu, | ||||
| 	}, | ||||
| 	[CPUHP_AP_WATCHDOG_ONLINE] = { | ||||
| 		.name			= "lockup_detector:online", | ||||
| 		.startup.single		= lockup_detector_online_cpu, | ||||
| 		.teardown.single	= lockup_detector_offline_cpu, | ||||
| 	}, | ||||
| 	[CPUHP_AP_WORKQUEUE_ONLINE] = { | ||||
| 		.name			= "workqueue:online", | ||||
| 		.startup.single		= workqueue_online_cpu, | ||||
|  |  | |||
|  | @ -18,18 +18,14 @@ | |||
| #include <linux/init.h> | ||||
| #include <linux/module.h> | ||||
| #include <linux/sysctl.h> | ||||
| #include <linux/smpboot.h> | ||||
| #include <linux/sched/rt.h> | ||||
| #include <uapi/linux/sched/types.h> | ||||
| #include <linux/tick.h> | ||||
| #include <linux/workqueue.h> | ||||
| #include <linux/sched/clock.h> | ||||
| #include <linux/sched/debug.h> | ||||
| #include <linux/sched/isolation.h> | ||||
| #include <linux/stop_machine.h> | ||||
| 
 | ||||
| #include <asm/irq_regs.h> | ||||
| #include <linux/kvm_para.h> | ||||
| #include <linux/kthread.h> | ||||
| 
 | ||||
| static DEFINE_MUTEX(watchdog_mutex); | ||||
| 
 | ||||
|  | @ -169,11 +165,10 @@ static void lockup_detector_update_enable(void) | |||
| unsigned int __read_mostly softlockup_panic = | ||||
| 			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; | ||||
| 
 | ||||
| static bool softlockup_threads_initialized __read_mostly; | ||||
| static bool softlockup_initialized __read_mostly; | ||||
| static u64 __read_mostly sample_period; | ||||
| 
 | ||||
| static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | ||||
| static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); | ||||
| static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); | ||||
| static DEFINE_PER_CPU(bool, softlockup_touch_sync); | ||||
| static DEFINE_PER_CPU(bool, soft_watchdog_warn); | ||||
|  | @ -335,6 +330,25 @@ static void watchdog_interrupt_count(void) | |||
| 	__this_cpu_inc(hrtimer_interrupts); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The watchdog thread function - touches the timestamp. | ||||
|  * | ||||
|  * It only runs once every sample_period seconds (4 seconds by | ||||
|  * default) to reset the softlockup timestamp. If this gets delayed | ||||
|  * for more than 2*watchdog_thresh seconds then the debug-printout | ||||
|  * triggers in watchdog_timer_fn(). | ||||
|  */ | ||||
| static int softlockup_fn(void *data) | ||||
| { | ||||
| 	__this_cpu_write(soft_lockup_hrtimer_cnt, | ||||
| 			 __this_cpu_read(hrtimer_interrupts)); | ||||
| 	__touch_watchdog(); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); | ||||
| 
 | ||||
| /* watchdog kicker functions */ | ||||
| static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | ||||
| { | ||||
|  | @ -350,7 +364,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
| 	watchdog_interrupt_count(); | ||||
| 
 | ||||
| 	/* kick the softlockup detector */ | ||||
| 	wake_up_process(__this_cpu_read(softlockup_watchdog)); | ||||
| 	stop_one_cpu_nowait(smp_processor_id(), | ||||
| 			softlockup_fn, NULL, | ||||
| 			this_cpu_ptr(&softlockup_stop_work)); | ||||
| 
 | ||||
| 	/* .. and repeat */ | ||||
| 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); | ||||
|  | @ -448,17 +464,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
| 	return HRTIMER_RESTART; | ||||
| } | ||||
| 
 | ||||
| static void watchdog_set_prio(unsigned int policy, unsigned int prio) | ||||
| { | ||||
| 	struct sched_param param = { .sched_priority = prio }; | ||||
| 
 | ||||
| 	sched_setscheduler(current, policy, ¶m); | ||||
| } | ||||
| 
 | ||||
| static void watchdog_enable(unsigned int cpu) | ||||
| { | ||||
| 	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); | ||||
| 
 | ||||
| 	WARN_ON_ONCE(cpu != smp_processor_id()); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Start the timer first to prevent the NMI watchdog triggering | ||||
| 	 * before the timer has a chance to fire. | ||||
|  | @ -473,15 +484,14 @@ static void watchdog_enable(unsigned int cpu) | |||
| 	/* Enable the perf event */ | ||||
| 	if (watchdog_enabled & NMI_WATCHDOG_ENABLED) | ||||
| 		watchdog_nmi_enable(cpu); | ||||
| 
 | ||||
| 	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); | ||||
| } | ||||
| 
 | ||||
| static void watchdog_disable(unsigned int cpu) | ||||
| { | ||||
| 	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); | ||||
| 
 | ||||
| 	watchdog_set_prio(SCHED_NORMAL, 0); | ||||
| 	WARN_ON_ONCE(cpu != smp_processor_id()); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Disable the perf event first. That prevents that a large delay | ||||
| 	 * between disabling the timer and disabling the perf event causes | ||||
|  | @ -491,77 +501,63 @@ static void watchdog_disable(unsigned int cpu) | |||
| 	hrtimer_cancel(hrtimer); | ||||
| } | ||||
| 
 | ||||
| static void watchdog_cleanup(unsigned int cpu, bool online) | ||||
| static int softlockup_stop_fn(void *data) | ||||
| { | ||||
| 	watchdog_disable(cpu); | ||||
| 	watchdog_disable(smp_processor_id()); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int watchdog_should_run(unsigned int cpu) | ||||
| static void softlockup_stop_all(void) | ||||
| { | ||||
| 	return __this_cpu_read(hrtimer_interrupts) != | ||||
| 		__this_cpu_read(soft_lockup_hrtimer_cnt); | ||||
| } | ||||
| 	int cpu; | ||||
| 
 | ||||
| /*
 | ||||
|  * The watchdog thread function - touches the timestamp. | ||||
|  * | ||||
|  * It only runs once every sample_period seconds (4 seconds by | ||||
|  * default) to reset the softlockup timestamp. If this gets delayed | ||||
|  * for more than 2*watchdog_thresh seconds then the debug-printout | ||||
|  * triggers in watchdog_timer_fn(). | ||||
|  */ | ||||
| static void watchdog(unsigned int cpu) | ||||
| { | ||||
| 	__this_cpu_write(soft_lockup_hrtimer_cnt, | ||||
| 			 __this_cpu_read(hrtimer_interrupts)); | ||||
| 	__touch_watchdog(); | ||||
| } | ||||
| 
 | ||||
| static struct smp_hotplug_thread watchdog_threads = { | ||||
| 	.store			= &softlockup_watchdog, | ||||
| 	.thread_should_run	= watchdog_should_run, | ||||
| 	.thread_fn		= watchdog, | ||||
| 	.thread_comm		= "watchdog/%u", | ||||
| 	.setup			= watchdog_enable, | ||||
| 	.cleanup		= watchdog_cleanup, | ||||
| 	.park			= watchdog_disable, | ||||
| 	.unpark			= watchdog_enable, | ||||
| }; | ||||
| 
 | ||||
| static void softlockup_update_smpboot_threads(void) | ||||
| { | ||||
| 	lockdep_assert_held(&watchdog_mutex); | ||||
| 
 | ||||
| 	if (!softlockup_threads_initialized) | ||||
| 	if (!softlockup_initialized) | ||||
| 		return; | ||||
| 
 | ||||
| 	smpboot_update_cpumask_percpu_thread(&watchdog_threads, | ||||
| 					     &watchdog_allowed_mask); | ||||
| } | ||||
| 	for_each_cpu(cpu, &watchdog_allowed_mask) | ||||
| 		smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false); | ||||
| 
 | ||||
| /* Temporarily park all watchdog threads */ | ||||
| static void softlockup_park_all_threads(void) | ||||
| { | ||||
| 	cpumask_clear(&watchdog_allowed_mask); | ||||
| 	softlockup_update_smpboot_threads(); | ||||
| } | ||||
| 
 | ||||
| /* Unpark enabled threads */ | ||||
| static void softlockup_unpark_threads(void) | ||||
| static int softlockup_start_fn(void *data) | ||||
| { | ||||
| 	watchdog_enable(smp_processor_id()); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void softlockup_start_all(void) | ||||
| { | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); | ||||
| 	softlockup_update_smpboot_threads(); | ||||
| 	for_each_cpu(cpu, &watchdog_allowed_mask) | ||||
| 		smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false); | ||||
| } | ||||
| 
 | ||||
| int lockup_detector_online_cpu(unsigned int cpu) | ||||
| { | ||||
| 	watchdog_enable(cpu); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int lockup_detector_offline_cpu(unsigned int cpu) | ||||
| { | ||||
| 	watchdog_disable(cpu); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void lockup_detector_reconfigure(void) | ||||
| { | ||||
| 	cpus_read_lock(); | ||||
| 	watchdog_nmi_stop(); | ||||
| 	softlockup_park_all_threads(); | ||||
| 
 | ||||
| 	softlockup_stop_all(); | ||||
| 	set_sample_period(); | ||||
| 	lockup_detector_update_enable(); | ||||
| 	if (watchdog_enabled && watchdog_thresh) | ||||
| 		softlockup_unpark_threads(); | ||||
| 		softlockup_start_all(); | ||||
| 
 | ||||
| 	watchdog_nmi_start(); | ||||
| 	cpus_read_unlock(); | ||||
| 	/*
 | ||||
|  | @ -580,8 +576,6 @@ static void lockup_detector_reconfigure(void) | |||
|  */ | ||||
| static __init void lockup_detector_setup(void) | ||||
| { | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If sysctl is off and watchdog got disabled on the command line, | ||||
| 	 * nothing to do here. | ||||
|  | @ -592,24 +586,13 @@ static __init void lockup_detector_setup(void) | |||
| 	    !(watchdog_enabled && watchdog_thresh)) | ||||
| 		return; | ||||
| 
 | ||||
| 	ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads, | ||||
| 						     &watchdog_allowed_mask); | ||||
| 	if (ret) { | ||||
| 		pr_err("Failed to initialize soft lockup detector threads\n"); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	mutex_lock(&watchdog_mutex); | ||||
| 	softlockup_threads_initialized = true; | ||||
| 	lockup_detector_reconfigure(); | ||||
| 	softlockup_initialized = true; | ||||
| 	mutex_unlock(&watchdog_mutex); | ||||
| } | ||||
| 
 | ||||
| #else /* CONFIG_SOFTLOCKUP_DETECTOR */ | ||||
| static inline int watchdog_park_threads(void) { return 0; } | ||||
| static inline void watchdog_unpark_threads(void) { } | ||||
| static inline int watchdog_enable_all_cpus(void) { return 0; } | ||||
| static inline void watchdog_disable_all_cpus(void) { } | ||||
| static void lockup_detector_reconfigure(void) | ||||
| { | ||||
| 	cpus_read_lock(); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Peter Zijlstra
						Peter Zijlstra