mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	panic, kexec: make __crash_kexec() NMI safe
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by:6ce47fd961("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes:6ce47fd961("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <vschneid@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baoquan He <bhe@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Juri Lelli <jlelli@redhat.com> Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									7bb5da0d49
								
							
						
					
					
						commit
						05c6257433
					
				
					 4 changed files with 30 additions and 20 deletions
				
			
		|  | @ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, | |||
| 
 | ||||
| 	/*
 | ||||
| 	 * Because we write directly to the reserved memory region when loading | ||||
| 	 * crash kernels we need a mutex here to prevent multiple crash kernels | ||||
| 	 * from attempting to load simultaneously, and to prevent a crash kernel | ||||
| 	 * from loading over the top of a in use crash kernel. | ||||
| 	 * | ||||
| 	 * KISS: always take the mutex. | ||||
| 	 * crash kernels we need a serialization here to prevent multiple crash | ||||
| 	 * kernels from attempting to load simultaneously. | ||||
| 	 */ | ||||
| 	if (!mutex_trylock(&kexec_mutex)) | ||||
| 	if (!kexec_trylock()) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	if (flags & KEXEC_ON_CRASH) { | ||||
|  | @ -165,7 +162,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, | |||
| 
 | ||||
| 	kimage_free(image); | ||||
| out_unlock: | ||||
| 	mutex_unlock(&kexec_mutex); | ||||
| 	kexec_unlock(); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -46,7 +46,7 @@ | |||
| #include <crypto/hash.h> | ||||
| #include "kexec_internal.h" | ||||
| 
 | ||||
| DEFINE_MUTEX(kexec_mutex); | ||||
| atomic_t __kexec_lock = ATOMIC_INIT(0); | ||||
| 
 | ||||
| /* Per cpu memory for storing cpu states in case of system crash. */ | ||||
| note_buf_t __percpu *crash_notes; | ||||
|  | @ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init); | |||
|  */ | ||||
| void __noclone __crash_kexec(struct pt_regs *regs) | ||||
| { | ||||
| 	/* Take the kexec_mutex here to prevent sys_kexec_load
 | ||||
| 	/* Take the kexec_lock here to prevent sys_kexec_load
 | ||||
| 	 * running on one cpu from replacing the crash kernel | ||||
| 	 * we are using after a panic on a different cpu. | ||||
| 	 * | ||||
|  | @ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) | |||
| 	 * of memory the xchg(&kexec_crash_image) would be | ||||
| 	 * sufficient.  But since I reuse the memory... | ||||
| 	 */ | ||||
| 	if (mutex_trylock(&kexec_mutex)) { | ||||
| 	if (kexec_trylock()) { | ||||
| 		if (kexec_crash_image) { | ||||
| 			struct pt_regs fixed_regs; | ||||
| 
 | ||||
|  | @ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs) | |||
| 			machine_crash_shutdown(&fixed_regs); | ||||
| 			machine_kexec(kexec_crash_image); | ||||
| 		} | ||||
| 		mutex_unlock(&kexec_mutex); | ||||
| 		kexec_unlock(); | ||||
| 	} | ||||
| } | ||||
| STACK_FRAME_NON_STANDARD(__crash_kexec); | ||||
|  | @ -1008,13 +1008,13 @@ ssize_t crash_get_memory_size(void) | |||
| { | ||||
| 	ssize_t size = 0; | ||||
| 
 | ||||
| 	if (!mutex_trylock(&kexec_mutex)) | ||||
| 	if (!kexec_trylock()) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	if (crashk_res.end != crashk_res.start) | ||||
| 		size = resource_size(&crashk_res); | ||||
| 
 | ||||
| 	mutex_unlock(&kexec_mutex); | ||||
| 	kexec_unlock(); | ||||
| 	return size; | ||||
| } | ||||
| 
 | ||||
|  | @ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size) | |||
| 	unsigned long old_size; | ||||
| 	struct resource *ram_res; | ||||
| 
 | ||||
| 	if (!mutex_trylock(&kexec_mutex)) | ||||
| 	if (!kexec_trylock()) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	if (kexec_crash_image) { | ||||
|  | @ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size) | |||
| 	insert_resource(&iomem_resource, ram_res); | ||||
| 
 | ||||
| unlock: | ||||
| 	mutex_unlock(&kexec_mutex); | ||||
| 	kexec_unlock(); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
|  | @ -1136,7 +1136,7 @@ int kernel_kexec(void) | |||
| { | ||||
| 	int error = 0; | ||||
| 
 | ||||
| 	if (!mutex_trylock(&kexec_mutex)) | ||||
| 	if (!kexec_trylock()) | ||||
| 		return -EBUSY; | ||||
| 	if (!kexec_image) { | ||||
| 		error = -EINVAL; | ||||
|  | @ -1212,6 +1212,6 @@ int kernel_kexec(void) | |||
| #endif | ||||
| 
 | ||||
|  Unlock: | ||||
| 	mutex_unlock(&kexec_mutex); | ||||
| 	kexec_unlock(); | ||||
| 	return error; | ||||
| } | ||||
|  |  | |||
|  | @ -339,7 +339,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, | |||
| 
 | ||||
| 	image = NULL; | ||||
| 
 | ||||
| 	if (!mutex_trylock(&kexec_mutex)) | ||||
| 	if (!kexec_trylock()) | ||||
| 		return -EBUSY; | ||||
| 
 | ||||
| 	dest_image = &kexec_image; | ||||
|  | @ -411,7 +411,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, | |||
| 	if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image) | ||||
| 		arch_kexec_protect_crashkres(); | ||||
| 
 | ||||
| 	mutex_unlock(&kexec_mutex); | ||||
| 	kexec_unlock(); | ||||
| 	kimage_free(image); | ||||
| 	return ret; | ||||
| } | ||||
|  |  | |||
|  | @ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image); | |||
| int kimage_is_destination_range(struct kimage *image, | ||||
| 				unsigned long start, unsigned long end); | ||||
| 
 | ||||
| extern struct mutex kexec_mutex; | ||||
| /*
 | ||||
|  * Whatever is used to serialize accesses to the kexec_crash_image needs to be | ||||
|  * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a | ||||
|  * "simple" atomic variable that is acquired with a cmpxchg(). | ||||
|  */ | ||||
| extern atomic_t __kexec_lock; | ||||
| static inline bool kexec_trylock(void) | ||||
| { | ||||
| 	return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0; | ||||
| } | ||||
| static inline void kexec_unlock(void) | ||||
| { | ||||
| 	atomic_set_release(&__kexec_lock, 0); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_KEXEC_FILE | ||||
| #include <linux/purgatory.h> | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Valentin Schneider
						Valentin Schneider