mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	kernel: add panic_on_warn
There have been several times where I have had to rebuild a kernel to
cause a panic when hitting a WARN() in the code in order to get a crash
dump from a system.  Sometimes this is easy to do, other times (such as
in the case of a remote admin) it is not trivial to send new images to
the user.
A much easier method would be a switch to change the WARN() over to a
panic.  This makes debugging easier in that I can now test the actual
image the WARN() was seen on and I do not have to engage in remote
debugging.
This patch adds a panic_on_warn kernel parameter and
/proc/sys/kernel/panic_on_warn calls panic() in the
warn_slowpath_common() path.  The function will still print out the
location of the warning.
An example of the panic_on_warn output:
The first line below is from the WARN_ON() to output the WARN_ON()'s
location.  After that the panic() output is displayed.
    WARNING: CPU: 30 PID: 11698 at /home/prarit/dummy_module/dummy-module.c:25 init_dummy+0x1f/0x30 [dummy_module]()
    Kernel panic - not syncing: panic_on_warn set ...
    CPU: 30 PID: 11698 Comm: insmod Tainted: G        W  OE  3.17.0+ #57
    Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
     0000000000000000 000000008e3f87df ffff88080f093c38 ffffffff81665190
     0000000000000000 ffffffff818aea3d ffff88080f093cb8 ffffffff8165e2ec
     ffffffff00000008 ffff88080f093cc8 ffff88080f093c68 000000008e3f87df
    Call Trace:
     [<ffffffff81665190>] dump_stack+0x46/0x58
     [<ffffffff8165e2ec>] panic+0xd0/0x204
     [<ffffffffa038e05f>] ? init_dummy+0x1f/0x30 [dummy_module]
     [<ffffffff81076b90>] warn_slowpath_common+0xd0/0xd0
     [<ffffffffa038e040>] ? dummy_greetings+0x40/0x40 [dummy_module]
     [<ffffffff81076c8a>] warn_slowpath_null+0x1a/0x20
     [<ffffffffa038e05f>] init_dummy+0x1f/0x30 [dummy_module]
     [<ffffffff81002144>] do_one_initcall+0xd4/0x210
     [<ffffffff811b52c2>] ? __vunmap+0xc2/0x110
     [<ffffffff810f8889>] load_module+0x16a9/0x1b30
     [<ffffffff810f3d30>] ? store_uevent+0x70/0x70
     [<ffffffff810f49b9>] ? copy_module_from_fd.isra.44+0x129/0x180
     [<ffffffff810f8ec6>] SyS_finit_module+0xa6/0xd0
     [<ffffffff8166cf29>] system_call_fastpath+0x12/0x17
Successfully tested by me.
hpa said: There is another very valid use for this: many operators would
rather a machine shuts down than being potentially compromised either
functionally or security-wise.
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									f938612dd9
								
							
						
					
					
						commit
						9e3961a097
					
				
					 8 changed files with 61 additions and 14 deletions
				
			
		| 
						 | 
				
			
			@ -471,6 +471,13 @@ format. Crash is available on Dave Anderson's site at the following URL:
 | 
			
		|||
 | 
			
		||||
   http://people.redhat.com/~anderson/
 | 
			
		||||
 | 
			
		||||
Trigger Kdump on WARN()
 | 
			
		||||
=======================
 | 
			
		||||
 | 
			
		||||
The kernel parameter, panic_on_warn, calls panic() in all WARN() paths.  This
 | 
			
		||||
will cause a kdump to occur at the panic() call.  In cases where a user wants
 | 
			
		||||
to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
 | 
			
		||||
to achieve the same behaviour.
 | 
			
		||||
 | 
			
		||||
Contact
 | 
			
		||||
=======
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2509,6 +2509,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 | 
			
		|||
			timeout < 0: reboot immediately
 | 
			
		||||
			Format: <timeout>
 | 
			
		||||
 | 
			
		||||
	panic_on_warn	panic() instead of WARN().  Useful to cause kdump
 | 
			
		||||
			on a WARN().
 | 
			
		||||
 | 
			
		||||
	crash_kexec_post_notifiers
 | 
			
		||||
			Run kdump after running panic-notifiers and dumping
 | 
			
		||||
			kmsg. This only for the users who doubt kdump always
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -54,8 +54,9 @@ show up in /proc/sys/kernel:
 | 
			
		|||
- overflowuid
 | 
			
		||||
- panic
 | 
			
		||||
- panic_on_oops
 | 
			
		||||
- panic_on_unrecovered_nmi
 | 
			
		||||
- panic_on_stackoverflow
 | 
			
		||||
- panic_on_unrecovered_nmi
 | 
			
		||||
- panic_on_warn
 | 
			
		||||
- pid_max
 | 
			
		||||
- powersave-nap               [ PPC only ]
 | 
			
		||||
- printk
 | 
			
		||||
| 
						 | 
				
			
			@ -527,19 +528,6 @@ the recommended setting is 60.
 | 
			
		|||
 | 
			
		||||
==============================================================
 | 
			
		||||
 | 
			
		||||
panic_on_unrecovered_nmi:
 | 
			
		||||
 | 
			
		||||
The default Linux behaviour on an NMI of either memory or unknown is
 | 
			
		||||
to continue operation. For many environments such as scientific
 | 
			
		||||
computing it is preferable that the box is taken out and the error
 | 
			
		||||
dealt with than an uncorrected parity/ECC error get propagated.
 | 
			
		||||
 | 
			
		||||
A small number of systems do generate NMI's for bizarre random reasons
 | 
			
		||||
such as power management so the default is off. That sysctl works like
 | 
			
		||||
the existing panic controls already in that directory.
 | 
			
		||||
 | 
			
		||||
==============================================================
 | 
			
		||||
 | 
			
		||||
panic_on_oops:
 | 
			
		||||
 | 
			
		||||
Controls the kernel's behaviour when an oops or BUG is encountered.
 | 
			
		||||
| 
						 | 
				
			
			@ -563,6 +551,30 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
 | 
			
		|||
 | 
			
		||||
==============================================================
 | 
			
		||||
 | 
			
		||||
panic_on_unrecovered_nmi:
 | 
			
		||||
 | 
			
		||||
The default Linux behaviour on an NMI of either memory or unknown is
 | 
			
		||||
to continue operation. For many environments such as scientific
 | 
			
		||||
computing it is preferable that the box is taken out and the error
 | 
			
		||||
dealt with than an uncorrected parity/ECC error get propagated.
 | 
			
		||||
 | 
			
		||||
A small number of systems do generate NMI's for bizarre random reasons
 | 
			
		||||
such as power management so the default is off. That sysctl works like
 | 
			
		||||
the existing panic controls already in that directory.
 | 
			
		||||
 | 
			
		||||
==============================================================
 | 
			
		||||
 | 
			
		||||
panic_on_warn:
 | 
			
		||||
 | 
			
		||||
Calls panic() in the WARN() path when set to 1.  This is useful to avoid
 | 
			
		||||
a kernel rebuild when attempting to kdump at the location of a WARN().
 | 
			
		||||
 | 
			
		||||
0: only WARN(), default behaviour.
 | 
			
		||||
 | 
			
		||||
1: call panic() after printing out WARN() location.
 | 
			
		||||
 | 
			
		||||
==============================================================
 | 
			
		||||
 | 
			
		||||
perf_cpu_time_max_percent:
 | 
			
		||||
 | 
			
		||||
Hints to the kernel how much CPU time it should be allowed to
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -427,6 +427,7 @@ extern int panic_timeout;
 | 
			
		|||
extern int panic_on_oops;
 | 
			
		||||
extern int panic_on_unrecovered_nmi;
 | 
			
		||||
extern int panic_on_io_nmi;
 | 
			
		||||
extern int panic_on_warn;
 | 
			
		||||
extern int sysctl_panic_on_stackoverflow;
 | 
			
		||||
/*
 | 
			
		||||
 * Only to be used by arch init code. If the user over-wrote the default
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -153,6 +153,7 @@ enum
 | 
			
		|||
	KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */
 | 
			
		||||
	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
 | 
			
		||||
	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
 | 
			
		||||
	KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -33,6 +33,7 @@ static int pause_on_oops;
 | 
			
		|||
static int pause_on_oops_flag;
 | 
			
		||||
static DEFINE_SPINLOCK(pause_on_oops_lock);
 | 
			
		||||
static bool crash_kexec_post_notifiers;
 | 
			
		||||
int panic_on_warn __read_mostly;
 | 
			
		||||
 | 
			
		||||
int panic_timeout = CONFIG_PANIC_TIMEOUT;
 | 
			
		||||
EXPORT_SYMBOL_GPL(panic_timeout);
 | 
			
		||||
| 
						 | 
				
			
			@ -428,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
 | 
			
		|||
	if (args)
 | 
			
		||||
		vprintk(args->fmt, args->args);
 | 
			
		||||
 | 
			
		||||
	if (panic_on_warn) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * This thread may hit another WARN() in the panic path.
 | 
			
		||||
		 * Resetting this prevents additional WARN() from panicking the
 | 
			
		||||
		 * system on this thread.  Other threads are blocked by the
 | 
			
		||||
		 * panic_mutex in panic().
 | 
			
		||||
		 */
 | 
			
		||||
		panic_on_warn = 0;
 | 
			
		||||
		panic("panic_on_warn set ...\n");
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	print_modules();
 | 
			
		||||
	dump_stack();
 | 
			
		||||
	print_oops_end_marker();
 | 
			
		||||
| 
						 | 
				
			
			@ -485,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail);
 | 
			
		|||
 | 
			
		||||
core_param(panic, panic_timeout, int, 0644);
 | 
			
		||||
core_param(pause_on_oops, pause_on_oops, int, 0644);
 | 
			
		||||
core_param(panic_on_warn, panic_on_warn, int, 0644);
 | 
			
		||||
 | 
			
		||||
static int __init setup_crash_kexec_post_notifiers(char *s)
 | 
			
		||||
{
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1104,6 +1104,15 @@ static struct ctl_table kern_table[] = {
 | 
			
		|||
		.proc_handler	= proc_dointvec,
 | 
			
		||||
	},
 | 
			
		||||
#endif
 | 
			
		||||
	{
 | 
			
		||||
		.procname	= "panic_on_warn",
 | 
			
		||||
		.data		= &panic_on_warn,
 | 
			
		||||
		.maxlen		= sizeof(int),
 | 
			
		||||
		.mode		= 0644,
 | 
			
		||||
		.proc_handler	= proc_dointvec_minmax,
 | 
			
		||||
		.extra1		= &zero,
 | 
			
		||||
		.extra2		= &one,
 | 
			
		||||
	},
 | 
			
		||||
	{ }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -137,6 +137,7 @@ static const struct bin_table bin_kern_table[] = {
 | 
			
		|||
	{ CTL_INT,	KERN_COMPAT_LOG,		"compat-log" },
 | 
			
		||||
	{ CTL_INT,	KERN_MAX_LOCK_DEPTH,		"max_lock_depth" },
 | 
			
		||||
	{ CTL_INT,	KERN_PANIC_ON_NMI,		"panic_on_unrecovered_nmi" },
 | 
			
		||||
	{ CTL_INT,	KERN_PANIC_ON_WARN,		"panic_on_warn" },
 | 
			
		||||
	{}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue