mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	Crash: add lock to serialize crash hotplug handling
Eric reported that handling corresponding crash hotplug event can be
failed easily when many memory hotplug event are notified in a short
period.  They failed because failing to take __kexec_lock.
=======
[   78.714569] Fallback order for Node 0: 0
[   78.714575] Built 1 zonelists, mobility grouping on.  Total pages: 1817886
[   78.717133] Policy zone: Normal
[   78.724423] crash hp: kexec_trylock() failed, elfcorehdr may be inaccurate
[   78.727207] crash hp: kexec_trylock() failed, elfcorehdr may be inaccurate
[   80.056643] PEFILE: Unsigned PE binary
=======
The memory hotplug events are notified very quickly and very many, while
the handling of crash hotplug is much slower relatively.  So the atomic
variable __kexec_lock and kexec_trylock() can't guarantee the
serialization of crash hotplug handling.
Here, add a new mutex lock __crash_hotplug_lock to serialize crash hotplug
handling specifically.  This doesn't impact the usage of __kexec_lock.
Link: https://lkml.kernel.org/r/20230926120905.392903-1-bhe@redhat.com
Fixes: 2472627561 ("crash: add generic infrastructure for crash hotplug support")
Signed-off-by: Baoquan He <bhe@redhat.com>
Tested-by: Eric DeVolder <eric.devolder@oracle.com>
Reviewed-by: Eric DeVolder <eric.devolder@oracle.com>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									bbe246f875
								
							
						
					
					
						commit
						e2a8f20dd8
					
				
					 1 changed files with 17 additions and 0 deletions
				
			
		| 
						 | 
					@ -739,6 +739,17 @@ subsys_initcall(crash_notes_memory_init);
 | 
				
			||||||
#undef pr_fmt
 | 
					#undef pr_fmt
 | 
				
			||||||
#define pr_fmt(fmt) "crash hp: " fmt
 | 
					#define pr_fmt(fmt) "crash hp: " fmt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Different than kexec/kdump loading/unloading/jumping/shrinking which
 | 
				
			||||||
 | 
					 * usually rarely happen, there will be many crash hotplug events notified
 | 
				
			||||||
 | 
					 * during one short period, e.g one memory board is hot added and memory
 | 
				
			||||||
 | 
					 * regions are online. So mutex lock  __crash_hotplug_lock is used to
 | 
				
			||||||
 | 
					 * serialize the crash hotplug handling specifically.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					DEFINE_MUTEX(__crash_hotplug_lock);
 | 
				
			||||||
 | 
					#define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock)
 | 
				
			||||||
 | 
					#define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * This routine utilized when the crash_hotplug sysfs node is read.
 | 
					 * This routine utilized when the crash_hotplug sysfs node is read.
 | 
				
			||||||
 * It reflects the kernel's ability/permission to update the crash
 | 
					 * It reflects the kernel's ability/permission to update the crash
 | 
				
			||||||
| 
						 | 
					@ -748,9 +759,11 @@ int crash_check_update_elfcorehdr(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int rc = 0;
 | 
						int rc = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						crash_hotplug_lock();
 | 
				
			||||||
	/* Obtain lock while reading crash information */
 | 
						/* Obtain lock while reading crash information */
 | 
				
			||||||
	if (!kexec_trylock()) {
 | 
						if (!kexec_trylock()) {
 | 
				
			||||||
		pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
 | 
							pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
 | 
				
			||||||
 | 
							crash_hotplug_unlock();
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	if (kexec_crash_image) {
 | 
						if (kexec_crash_image) {
 | 
				
			||||||
| 
						 | 
					@ -761,6 +774,7 @@ int crash_check_update_elfcorehdr(void)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	/* Release lock now that update complete */
 | 
						/* Release lock now that update complete */
 | 
				
			||||||
	kexec_unlock();
 | 
						kexec_unlock();
 | 
				
			||||||
 | 
						crash_hotplug_unlock();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return rc;
 | 
						return rc;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -783,9 +797,11 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kimage *image;
 | 
						struct kimage *image;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						crash_hotplug_lock();
 | 
				
			||||||
	/* Obtain lock while changing crash information */
 | 
						/* Obtain lock while changing crash information */
 | 
				
			||||||
	if (!kexec_trylock()) {
 | 
						if (!kexec_trylock()) {
 | 
				
			||||||
		pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
 | 
							pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
 | 
				
			||||||
 | 
							crash_hotplug_unlock();
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -852,6 +868,7 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	/* Release lock now that update complete */
 | 
						/* Release lock now that update complete */
 | 
				
			||||||
	kexec_unlock();
 | 
						kexec_unlock();
 | 
				
			||||||
 | 
						crash_hotplug_unlock();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
 | 
					static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue