forked from mirrors/linux
		
	kernel/watchdog: Prevent false positives with turbo modes
The hardlockup detector on x86 uses a performance counter based on unhalted
CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the
performance counter period, so the hrtimer should fire 2-3 times before the
performance counter NMI fires. The NMI code checks whether the hrtimer
fired since the last invocation. If not, it assumess a hard lockup.
The calculation of those periods is based on the nominal CPU
frequency. Turbo modes increase the CPU clock frequency and therefore
shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x
nominal frequency) the perf/NMI period is shorter than the hrtimer period
which leads to false positives.
A simple fix would be to shorten the hrtimer period, but that comes with
the side effect of more frequent hrtimer and softlockup thread wakeups,
which is not desired.
Implement a low pass filter, which checks the perf/NMI period against
kernel time. If the perf/NMI fires before 4/5 of the watchdog period has
elapsed then the event is ignored and postponed to the next perf/NMI.
That solves the problem and avoids the overhead of shorter hrtimer periods
and more frequent softlockup thread wakeups.
Fixes: 58687acba5 ("lockup_detector: Combine nmi_watchdog and softlockup detector")
Reported-and-tested-by: Kan Liang <Kan.liang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: dzickus@redhat.com
Cc: prarit@redhat.com
Cc: ak@linux.intel.com
Cc: babu.moger@oracle.com
Cc: peterz@infradead.org
Cc: eranian@google.com
Cc: acme@redhat.com
Cc: stable@vger.kernel.org
Cc: atomlin@redhat.com
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
			
			
This commit is contained in:
		
							parent
							
								
									ef954844c7
								
							
						
					
					
						commit
						7edaeb6841
					
				
					 5 changed files with 76 additions and 0 deletions
				
			
		|  | @ -100,6 +100,7 @@ config X86 | |||
| 	select GENERIC_STRNCPY_FROM_USER | ||||
| 	select GENERIC_STRNLEN_USER | ||||
| 	select GENERIC_TIME_VSYSCALL | ||||
| 	select HARDLOCKUP_CHECK_TIMESTAMP	if X86_64 | ||||
| 	select HAVE_ACPI_APEI			if ACPI | ||||
| 	select HAVE_ACPI_APEI_NMI		if ACPI | ||||
| 	select HAVE_ALIGNED_STRUCT_PAGE		if SLUB | ||||
|  |  | |||
|  | @ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace; | |||
| #define sysctl_softlockup_all_cpu_backtrace 0 | ||||
| #define sysctl_hardlockup_all_cpu_backtrace 0 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ | ||||
|     defined(CONFIG_HARDLOCKUP_DETECTOR) | ||||
| void watchdog_update_hrtimer_threshold(u64 period); | ||||
| #else | ||||
| static inline void watchdog_update_hrtimer_threshold(u64 period) { } | ||||
| #endif | ||||
| 
 | ||||
| extern bool is_hardlockup(void); | ||||
| struct ctl_table; | ||||
| extern int proc_watchdog(struct ctl_table *, int , | ||||
|  |  | |||
|  | @ -240,6 +240,7 @@ static void set_sample_period(void) | |||
| 	 * hardlockup detector generates a warning | ||||
| 	 */ | ||||
| 	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); | ||||
| 	watchdog_update_hrtimer_threshold(sample_period); | ||||
| } | ||||
| 
 | ||||
| /* Commands for resetting the watchdog */ | ||||
|  |  | |||
|  | @ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void) | |||
| } | ||||
| EXPORT_SYMBOL(arch_touch_nmi_watchdog); | ||||
| 
 | ||||
| #ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP | ||||
| static DEFINE_PER_CPU(ktime_t, last_timestamp); | ||||
| static DEFINE_PER_CPU(unsigned int, nmi_rearmed); | ||||
| static ktime_t watchdog_hrtimer_sample_threshold __read_mostly; | ||||
| 
 | ||||
| void watchdog_update_hrtimer_threshold(u64 period) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * The hrtimer runs with a period of (watchdog_threshold * 2) / 5 | ||||
| 	 * | ||||
| 	 * So it runs effectively with 2.5 times the rate of the NMI | ||||
| 	 * watchdog. That means the hrtimer should fire 2-3 times before | ||||
| 	 * the NMI watchdog expires. The NMI watchdog on x86 is based on | ||||
| 	 * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles | ||||
| 	 * might run way faster than expected and the NMI fires in a | ||||
| 	 * smaller period than the one deduced from the nominal CPU | ||||
| 	 * frequency. Depending on the Turbo-Mode factor this might be fast | ||||
| 	 * enough to get the NMI period smaller than the hrtimer watchdog | ||||
| 	 * period and trigger false positives. | ||||
| 	 * | ||||
| 	 * The sample threshold is used to check in the NMI handler whether | ||||
| 	 * the minimum time between two NMI samples has elapsed. That | ||||
| 	 * prevents false positives. | ||||
| 	 * | ||||
| 	 * Set this to 4/5 of the actual watchdog threshold period so the | ||||
| 	 * hrtimer is guaranteed to fire at least once within the real | ||||
| 	 * watchdog threshold. | ||||
| 	 */ | ||||
| 	watchdog_hrtimer_sample_threshold = period * 2; | ||||
| } | ||||
| 
 | ||||
| static bool watchdog_check_timestamp(void) | ||||
| { | ||||
| 	ktime_t delta, now = ktime_get_mono_fast_ns(); | ||||
| 
 | ||||
| 	delta = now - __this_cpu_read(last_timestamp); | ||||
| 	if (delta < watchdog_hrtimer_sample_threshold) { | ||||
| 		/*
 | ||||
| 		 * If ktime is jiffies based, a stalled timer would prevent | ||||
| 		 * jiffies from being incremented and the filter would look | ||||
| 		 * at a stale timestamp and never trigger. | ||||
| 		 */ | ||||
| 		if (__this_cpu_inc_return(nmi_rearmed) < 10) | ||||
| 			return false; | ||||
| 	} | ||||
| 	__this_cpu_write(nmi_rearmed, 0); | ||||
| 	__this_cpu_write(last_timestamp, now); | ||||
| 	return true; | ||||
| } | ||||
| #else | ||||
| static inline bool watchdog_check_timestamp(void) | ||||
| { | ||||
| 	return true; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| static struct perf_event_attr wd_hw_attr = { | ||||
| 	.type		= PERF_TYPE_HARDWARE, | ||||
| 	.config		= PERF_COUNT_HW_CPU_CYCLES, | ||||
|  | @ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event, | |||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!watchdog_check_timestamp()) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* check for a hardlockup
 | ||||
| 	 * This is done by making sure our timer interrupt | ||||
| 	 * is incrementing.  The timer interrupt should have | ||||
|  |  | |||
|  | @ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF | |||
| 	bool | ||||
| 	select SOFTLOCKUP_DETECTOR | ||||
| 
 | ||||
| # | ||||
| # Enables a timestamp based low pass filter to compensate for perf based | ||||
| # hard lockup detection which runs too fast due to turbo modes. | ||||
| # | ||||
| config HARDLOCKUP_CHECK_TIMESTAMP | ||||
| 	bool | ||||
| 
 | ||||
| # | ||||
| # arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard | ||||
| # lockup detector rather than the perf based detector. | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Thomas Gleixner
						Thomas Gleixner