mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()
In preparation for the buddy hardlockup detector where the CPU checking for lockup might not be the currently running CPU, add a "cpu" parameter to watchdog_hardlockup_check(). As part of this change, make hrtimer_interrupts an atomic_t since now the CPU incrementing the value and the CPU reading the value might be different. Technially this could also be done with just READ_ONCE and WRITE_ONCE, but atomic_t feels a little cleaner in this case. While hrtimer_interrupts is made atomic_t, we change hrtimer_interrupts_saved from "unsigned long" to "int". The "int" is needed to match the data type backing atomic_t for hrtimer_interrupts. Even if this changes us from 64-bits to 32-bits (which I don't think is true for most compilers), it doesn't really matter. All we ever do is increment it every few seconds and compare it to an old value so 32-bits is fine (even 16-bits would be). The "signed" vs "unsigned" also doesn't matter for simple equality comparisons. hrtimer_interrupts_saved is _not_ switched to atomic_t nor even accessed with READ_ONCE / WRITE_ONCE. The hrtimer_interrupts_saved is always consistently accessed with the same CPU. NOTE: with the upcoming "buddy" detector there is one special case. When a CPU goes offline/online then we can change which CPU is the one to consistently access a given instance of hrtimer_interrupts_saved. We still can't end up with a partially updated hrtimer_interrupts_saved, however, because we end up petting all affected CPUs to make sure the new and old CPU can't end up somehow read/write hrtimer_interrupts_saved at the same time. Link: https://lkml.kernel.org/r/20230519101840.v5.10.I3a7d4dd8c23ac30ee0b607d77feb6646b64825c0@changeid Signed-off-by: Douglas Anderson <dianders@chromium.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chen-Yu Tsai <wens@csie.org> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Colin Cross <ccross@android.com> Cc: Daniel Thompson <daniel.thompson@linaro.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Guenter Roeck <groeck@chromium.org> Cc: Ian Rogers <irogers@google.com> Cc: Lecopzer Chen <lecopzer.chen@mediatek.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masayoshi Mizuma <msys.mizuma@gmail.com> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Pingfan Liu <kernelfans@gmail.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com> Cc: Ricardo Neri <ricardo.neri@intel.com> Cc: Stephane Eranian <eranian@google.com> Cc: Stephen Boyd <swboyd@chromium.org> Cc: Sumit Garg <sumit.garg@linaro.org> Cc: Tzung-Bi Shih <tzungbi@chromium.org> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									1610611aad
								
							
						
					
					
						commit
						77c12fc959
					
				
					 3 changed files with 34 additions and 22 deletions
				
			
		| 
						 | 
					@ -88,7 +88,7 @@ static inline void hardlockup_detector_disable(void) {}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 | 
					#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 | 
				
			||||||
void watchdog_hardlockup_check(struct pt_regs *regs);
 | 
					void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 | 
					#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -87,29 +87,34 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 | 
					#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 | 
					static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
 | 
				
			||||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 | 
					static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
 | 
				
			||||||
static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
 | 
					static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
 | 
				
			||||||
static unsigned long watchdog_hardlockup_all_cpu_dumped;
 | 
					static unsigned long watchdog_hardlockup_all_cpu_dumped;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool is_hardlockup(void)
 | 
					static bool is_hardlockup(unsigned int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
 | 
						int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
 | 
						if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
 | 
				
			||||||
		return true;
 | 
							return true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__this_cpu_write(hrtimer_interrupts_saved, hrint);
 | 
						/*
 | 
				
			||||||
 | 
						 * NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE
 | 
				
			||||||
 | 
						 * for hrtimer_interrupts_saved. hrtimer_interrupts_saved is
 | 
				
			||||||
 | 
						 * written/read by a single CPU.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return false;
 | 
						return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void watchdog_hardlockup_kick(void)
 | 
					static void watchdog_hardlockup_kick(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	__this_cpu_inc(hrtimer_interrupts);
 | 
						atomic_inc(raw_cpu_ptr(&hrtimer_interrupts));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void watchdog_hardlockup_check(struct pt_regs *regs)
 | 
					void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Check for a hardlockup by making sure the CPU's timer
 | 
						 * Check for a hardlockup by making sure the CPU's timer
 | 
				
			||||||
| 
						 | 
					@ -117,35 +122,42 @@ void watchdog_hardlockup_check(struct pt_regs *regs)
 | 
				
			||||||
	 * fired multiple times before we overflow'd. If it hasn't
 | 
						 * fired multiple times before we overflow'd. If it hasn't
 | 
				
			||||||
	 * then this is a good indication the cpu is stuck
 | 
						 * then this is a good indication the cpu is stuck
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (is_hardlockup()) {
 | 
						if (is_hardlockup(cpu)) {
 | 
				
			||||||
		unsigned int this_cpu = smp_processor_id();
 | 
							unsigned int this_cpu = smp_processor_id();
 | 
				
			||||||
 | 
							struct cpumask backtrace_mask = *cpu_online_mask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* Only print hardlockups once. */
 | 
							/* Only print hardlockups once. */
 | 
				
			||||||
		if (__this_cpu_read(watchdog_hardlockup_warned))
 | 
							if (per_cpu(watchdog_hardlockup_warned, cpu))
 | 
				
			||||||
			return;
 | 
								return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu);
 | 
							pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu);
 | 
				
			||||||
		print_modules();
 | 
							print_modules();
 | 
				
			||||||
		print_irqtrace_events(current);
 | 
							print_irqtrace_events(current);
 | 
				
			||||||
 | 
							if (cpu == this_cpu) {
 | 
				
			||||||
			if (regs)
 | 
								if (regs)
 | 
				
			||||||
				show_regs(regs);
 | 
									show_regs(regs);
 | 
				
			||||||
			else
 | 
								else
 | 
				
			||||||
				dump_stack();
 | 
									dump_stack();
 | 
				
			||||||
 | 
								cpumask_clear_cpu(cpu, &backtrace_mask);
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								if (trigger_single_cpu_backtrace(cpu))
 | 
				
			||||||
 | 
									cpumask_clear_cpu(cpu, &backtrace_mask);
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Perform all-CPU dump only once to avoid multiple hardlockups
 | 
							 * Perform multi-CPU dump only once to avoid multiple
 | 
				
			||||||
		 * generating interleaving traces
 | 
							 * hardlockups generating interleaving traces
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (sysctl_hardlockup_all_cpu_backtrace &&
 | 
							if (sysctl_hardlockup_all_cpu_backtrace &&
 | 
				
			||||||
		    !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped))
 | 
							    !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped))
 | 
				
			||||||
			trigger_allbutself_cpu_backtrace();
 | 
								trigger_cpumask_backtrace(&backtrace_mask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (hardlockup_panic)
 | 
							if (hardlockup_panic)
 | 
				
			||||||
			nmi_panic(regs, "Hard LOCKUP");
 | 
								nmi_panic(regs, "Hard LOCKUP");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		__this_cpu_write(watchdog_hardlockup_warned, true);
 | 
							per_cpu(watchdog_hardlockup_warned, cpu) = true;
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		__this_cpu_write(watchdog_hardlockup_warned, false);
 | 
							per_cpu(watchdog_hardlockup_warned, cpu) = false;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -120,7 +120,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	watchdog_hardlockup_check(regs);
 | 
						watchdog_hardlockup_check(smp_processor_id(), regs);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int hardlockup_detector_event_create(void)
 | 
					static int hardlockup_detector_event_create(void)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue