mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	x86/nmi: Print reasons why backtrace NMIs are ignored
Instrument nmi_trigger_cpumask_backtrace() to dump out diagnostics based on evidence accumulated by exc_nmi(). These diagnostics are dumped for CPUs that ignored an NMI backtrace request for more than 10 seconds. [ paulmck: Apply Ingo Molnar feedback. ] Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <x86@kernel.org> Reviewed-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									1a3ea611fc
								
							
						
					
					
						commit
						344da544f1
					
				
					 3 changed files with 83 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -570,6 +570,79 @@ DEFINE_IDTENTRY_RAW(exc_nmi_noist)
 | 
			
		|||
EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_NMI_CHECK_CPU
 | 
			
		||||
 | 
			
		||||
static char *nmi_check_stall_msg[] = {
 | 
			
		||||
/*									*/
 | 
			
		||||
/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler.		*/
 | 
			
		||||
/* | +------ cpu_is_offline(cpu)					*/
 | 
			
		||||
/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls):	*/
 | 
			
		||||
/* | | |	NMI handler has been invoked.				*/
 | 
			
		||||
/* | | |								*/
 | 
			
		||||
/* V V V								*/
 | 
			
		||||
/* 0 0 0 */ "NMIs are not reaching exc_nmi() handler",
 | 
			
		||||
/* 0 0 1 */ "exc_nmi() handler is ignoring NMIs",
 | 
			
		||||
/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler",
 | 
			
		||||
/* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs",
 | 
			
		||||
/* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler",
 | 
			
		||||
/* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs",
 | 
			
		||||
/* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler",
 | 
			
		||||
/* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void nmi_backtrace_stall_snap(const struct cpumask *btp)
 | 
			
		||||
{
 | 
			
		||||
	int cpu;
 | 
			
		||||
	struct nmi_stats *nsp;
 | 
			
		||||
 | 
			
		||||
	for_each_cpu(cpu, btp) {
 | 
			
		||||
		nsp = per_cpu_ptr(&nmi_stats, cpu);
 | 
			
		||||
		nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq);
 | 
			
		||||
		nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq);
 | 
			
		||||
		nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored);
 | 
			
		||||
		nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void nmi_backtrace_stall_check(const struct cpumask *btp)
 | 
			
		||||
{
 | 
			
		||||
	int cpu;
 | 
			
		||||
	int idx;
 | 
			
		||||
	unsigned long nmi_seq;
 | 
			
		||||
	unsigned long j = jiffies;
 | 
			
		||||
	char *modp;
 | 
			
		||||
	char *msgp;
 | 
			
		||||
	char *msghp;
 | 
			
		||||
	struct nmi_stats *nsp;
 | 
			
		||||
 | 
			
		||||
	for_each_cpu(cpu, btp) {
 | 
			
		||||
		nsp = per_cpu_ptr(&nmi_stats, cpu);
 | 
			
		||||
		modp = "";
 | 
			
		||||
		msghp = "";
 | 
			
		||||
		nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
 | 
			
		||||
		if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
 | 
			
		||||
			msgp = "CPU entered NMI handler function, but has not exited";
 | 
			
		||||
		} else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
 | 
			
		||||
			msgp = "CPU is handling NMIs";
 | 
			
		||||
		} else {
 | 
			
		||||
			idx = ((nsp->idt_seq_snap & 0x1) << 2) |
 | 
			
		||||
			      (cpu_is_offline(cpu) << 1) |
 | 
			
		||||
			      (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
 | 
			
		||||
			msgp = nmi_check_stall_msg[idx];
 | 
			
		||||
			if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
 | 
			
		||||
				modp = ", but OK because ignore_nmis was set";
 | 
			
		||||
			if (nmi_seq & ~0x1)
 | 
			
		||||
				msghp = " (CPU currently in NMI handler function)";
 | 
			
		||||
			else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
 | 
			
		||||
				msghp = " (CPU exited one NMI handler function)";
 | 
			
		||||
		}
 | 
			
		||||
		pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
 | 
			
		||||
			 __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void stop_nmi(void)
 | 
			
		||||
{
 | 
			
		||||
	ignore_nmis++;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -214,4 +214,12 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
 | 
			
		|||
#include <asm/nmi.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_NMI_CHECK_CPU
 | 
			
		||||
void nmi_backtrace_stall_snap(const struct cpumask *btp);
 | 
			
		||||
void nmi_backtrace_stall_check(const struct cpumask *btp);
 | 
			
		||||
#else
 | 
			
		||||
static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {}
 | 
			
		||||
static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -64,6 +64,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 | 
			
		|||
	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
 | 
			
		||||
		pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
 | 
			
		||||
			this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
 | 
			
		||||
		nmi_backtrace_stall_snap(to_cpumask(backtrace_mask));
 | 
			
		||||
		raise(to_cpumask(backtrace_mask));
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -74,6 +75,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 | 
			
		|||
		mdelay(1);
 | 
			
		||||
		touch_softlockup_watchdog();
 | 
			
		||||
	}
 | 
			
		||||
	nmi_backtrace_stall_check(to_cpumask(backtrace_mask));
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Force flush any remote buffers that might be stuck in IRQ context
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue