mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcore callbacks
Let's support multiple registered callbacks, making sure that registering vmcore callbacks cannot fail. Make the callback return a bool instead of an int, handling how to deal with errors internally. Drop unused HAVE_OLDMEM_PFN_IS_RAM. We soon want to make use of this infrastructure from other drivers: virtio-mem, registering one callback for each virtio-mem device, to prevent reading unplugged virtio-mem memory. Handle it via a generic vmcore_cb structure, prepared for future extensions: for example, once we support virtio-mem on s390x where the vmcore is completely constructed in the second kernel, we want to detect and add plugged virtio-mem memory ranges to the vmcore in order for them to get dumped properly. Handle corner cases that are unexpected and shouldn't happen in sane setups: registering a callback after the vmcore has already been opened (warn only) and unregistering a callback after the vmcore has already been opened (warn and essentially read only zeroes from that point on). Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Young <dyoung@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									2c9feeaedf
								
							
						
					
					
						commit
						cc5f2704c9
					
				
					 4 changed files with 110 additions and 37 deletions
				
			
		|  | @ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn) | ||||||
| 		      (pfn >= aperture_pfn_start + aperture_page_count)); | 		      (pfn >= aperture_pfn_start + aperture_page_count)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_PROC_VMCORE | ||||||
|  | static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn) | ||||||
|  | { | ||||||
|  | 	return !!gart_mem_pfn_is_ram(pfn); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct vmcore_cb gart_vmcore_cb = { | ||||||
|  | 	.pfn_is_ram = gart_oldmem_pfn_is_ram, | ||||||
|  | }; | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| static void __init exclude_from_core(u64 aper_base, u32 aper_order) | static void __init exclude_from_core(u64 aper_base, u32 aper_order) | ||||||
| { | { | ||||||
| 	aperture_pfn_start = aper_base >> PAGE_SHIFT; | 	aperture_pfn_start = aper_base >> PAGE_SHIFT; | ||||||
| 	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; | 	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; | ||||||
| #ifdef CONFIG_PROC_VMCORE | #ifdef CONFIG_PROC_VMCORE | ||||||
| 	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); | 	register_vmcore_cb(&gart_vmcore_cb); | ||||||
| #endif | #endif | ||||||
| #ifdef CONFIG_PROC_KCORE | #ifdef CONFIG_PROC_KCORE | ||||||
| 	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); | 	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); | ||||||
|  |  | ||||||
|  | @ -12,10 +12,10 @@ | ||||||
|  * The kdump kernel has to check whether a pfn of the crashed kernel |  * The kdump kernel has to check whether a pfn of the crashed kernel | ||||||
|  * was a ballooned page. vmcore is using this function to decide |  * was a ballooned page. vmcore is using this function to decide | ||||||
|  * whether to access a pfn of the crashed kernel. |  * whether to access a pfn of the crashed kernel. | ||||||
|  * Returns 0 if the pfn is not backed by a RAM page, the caller may |  * Returns "false" if the pfn is not backed by a RAM page, the caller may | ||||||
|  * handle the pfn special in this case. |  * handle the pfn special in this case. | ||||||
|  */ |  */ | ||||||
| static int xen_oldmem_pfn_is_ram(unsigned long pfn) | static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn) | ||||||
| { | { | ||||||
| 	struct xen_hvm_get_mem_type a = { | 	struct xen_hvm_get_mem_type a = { | ||||||
| 		.domid = DOMID_SELF, | 		.domid = DOMID_SELF, | ||||||
|  | @ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn) | ||||||
| 
 | 
 | ||||||
| 	if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) { | 	if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) { | ||||||
| 		pr_warn_once("Unexpected HVMOP_get_mem_type failure\n"); | 		pr_warn_once("Unexpected HVMOP_get_mem_type failure\n"); | ||||||
| 		return -ENXIO; | 		return true; | ||||||
| 	} | 	} | ||||||
| 	return a.mem_type != HVMMEM_mmio_dm; | 	return a.mem_type != HVMMEM_mmio_dm; | ||||||
| } | } | ||||||
|  | static struct vmcore_cb xen_vmcore_cb = { | ||||||
|  | 	.pfn_is_ram = xen_vmcore_pfn_is_ram, | ||||||
|  | }; | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| static void xen_hvm_exit_mmap(struct mm_struct *mm) | static void xen_hvm_exit_mmap(struct mm_struct *mm) | ||||||
|  | @ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void) | ||||||
| 	if (is_pagetable_dying_supported()) | 	if (is_pagetable_dying_supported()) | ||||||
| 		pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap; | 		pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap; | ||||||
| #ifdef CONFIG_PROC_VMCORE | #ifdef CONFIG_PROC_VMCORE | ||||||
| 	WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram)); | 	register_vmcore_cb(&xen_vmcore_cb); | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0); | ||||||
| /* Device Dump Size */ | /* Device Dump Size */ | ||||||
| static size_t vmcoredd_orig_sz; | static size_t vmcoredd_orig_sz; | ||||||
| 
 | 
 | ||||||
| /*
 | static DECLARE_RWSEM(vmcore_cb_rwsem); | ||||||
|  * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error | /* List of registered vmcore callbacks. */ | ||||||
|  * The called function has to take care of module refcounting. | static LIST_HEAD(vmcore_cb_list); | ||||||
|  */ | /* Whether we had a surprise unregistration of a callback. */ | ||||||
| static int (*oldmem_pfn_is_ram)(unsigned long pfn); | static bool vmcore_cb_unstable; | ||||||
|  | /* Whether the vmcore has been opened once. */ | ||||||
|  | static bool vmcore_opened; | ||||||
| 
 | 
 | ||||||
| int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)) | void register_vmcore_cb(struct vmcore_cb *cb) | ||||||
| { | { | ||||||
| 	if (oldmem_pfn_is_ram) | 	down_write(&vmcore_cb_rwsem); | ||||||
| 		return -EBUSY; | 	INIT_LIST_HEAD(&cb->next); | ||||||
| 	oldmem_pfn_is_ram = fn; | 	list_add_tail(&cb->next, &vmcore_cb_list); | ||||||
| 	return 0; | 	/*
 | ||||||
|  | 	 * Registering a vmcore callback after the vmcore was opened is | ||||||
|  | 	 * very unusual (e.g., manual driver loading). | ||||||
|  | 	 */ | ||||||
|  | 	if (vmcore_opened) | ||||||
|  | 		pr_warn_once("Unexpected vmcore callback registration\n"); | ||||||
|  | 	up_write(&vmcore_cb_rwsem); | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram); | EXPORT_SYMBOL_GPL(register_vmcore_cb); | ||||||
| 
 | 
 | ||||||
| void unregister_oldmem_pfn_is_ram(void) | void unregister_vmcore_cb(struct vmcore_cb *cb) | ||||||
| { | { | ||||||
| 	oldmem_pfn_is_ram = NULL; | 	down_write(&vmcore_cb_rwsem); | ||||||
| 	wmb(); | 	list_del(&cb->next); | ||||||
|  | 	/*
 | ||||||
|  | 	 * Unregistering a vmcore callback after the vmcore was opened is | ||||||
|  | 	 * very unusual (e.g., forced driver removal), but we cannot stop | ||||||
|  | 	 * unregistering. | ||||||
|  | 	 */ | ||||||
|  | 	if (vmcore_opened) { | ||||||
|  | 		pr_warn_once("Unexpected vmcore callback unregistration\n"); | ||||||
|  | 		vmcore_cb_unstable = true; | ||||||
|  | 	} | ||||||
|  | 	up_write(&vmcore_cb_rwsem); | ||||||
| } | } | ||||||
| EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram); | EXPORT_SYMBOL_GPL(unregister_vmcore_cb); | ||||||
| 
 | 
 | ||||||
| static bool pfn_is_ram(unsigned long pfn) | static bool pfn_is_ram(unsigned long pfn) | ||||||
| { | { | ||||||
| 	int (*fn)(unsigned long pfn); | 	struct vmcore_cb *cb; | ||||||
| 	/* pfn is ram unless fn() checks pagetype */ |  | ||||||
| 	bool ret = true; | 	bool ret = true; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	lockdep_assert_held_read(&vmcore_cb_rwsem); | ||||||
| 	 * Ask hypervisor if the pfn is really ram. | 	if (unlikely(vmcore_cb_unstable)) | ||||||
| 	 * A ballooned page contains no data and reading from such a page | 		return false; | ||||||
| 	 * will cause high load in the hypervisor. | 
 | ||||||
| 	 */ | 	list_for_each_entry(cb, &vmcore_cb_list, next) { | ||||||
| 	fn = oldmem_pfn_is_ram; | 		if (unlikely(!cb->pfn_is_ram)) | ||||||
| 	if (fn) | 			continue; | ||||||
| 		ret = !!fn(pfn); | 		ret = cb->pfn_is_ram(cb, pfn); | ||||||
|  | 		if (!ret) | ||||||
|  | 			break; | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static int open_vmcore(struct inode *inode, struct file *file) | ||||||
|  | { | ||||||
|  | 	down_read(&vmcore_cb_rwsem); | ||||||
|  | 	vmcore_opened = true; | ||||||
|  | 	up_read(&vmcore_cb_rwsem); | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Reads a page from the oldmem device from given offset. */ | /* Reads a page from the oldmem device from given offset. */ | ||||||
| ssize_t read_from_oldmem(char *buf, size_t count, | ssize_t read_from_oldmem(char *buf, size_t count, | ||||||
| 			 u64 *ppos, int userbuf, | 			 u64 *ppos, int userbuf, | ||||||
|  | @ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, | ||||||
| 	offset = (unsigned long)(*ppos % PAGE_SIZE); | 	offset = (unsigned long)(*ppos % PAGE_SIZE); | ||||||
| 	pfn = (unsigned long)(*ppos / PAGE_SIZE); | 	pfn = (unsigned long)(*ppos / PAGE_SIZE); | ||||||
| 
 | 
 | ||||||
|  | 	down_read(&vmcore_cb_rwsem); | ||||||
| 	do { | 	do { | ||||||
| 		if (count > (PAGE_SIZE - offset)) | 		if (count > (PAGE_SIZE - offset)) | ||||||
| 			nr_bytes = PAGE_SIZE - offset; | 			nr_bytes = PAGE_SIZE - offset; | ||||||
|  | @ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, | ||||||
| 				tmp = copy_oldmem_page(pfn, buf, nr_bytes, | 				tmp = copy_oldmem_page(pfn, buf, nr_bytes, | ||||||
| 						       offset, userbuf); | 						       offset, userbuf); | ||||||
| 
 | 
 | ||||||
| 			if (tmp < 0) | 			if (tmp < 0) { | ||||||
|  | 				up_read(&vmcore_cb_rwsem); | ||||||
| 				return tmp; | 				return tmp; | ||||||
|  | 			} | ||||||
| 		} | 		} | ||||||
| 		*ppos += nr_bytes; | 		*ppos += nr_bytes; | ||||||
| 		count -= nr_bytes; | 		count -= nr_bytes; | ||||||
|  | @ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, | ||||||
| 		offset = 0; | 		offset = 0; | ||||||
| 	} while (count); | 	} while (count); | ||||||
| 
 | 
 | ||||||
|  | 	up_read(&vmcore_cb_rwsem); | ||||||
| 	return read; | 	return read; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, | ||||||
| 			    unsigned long from, unsigned long pfn, | 			    unsigned long from, unsigned long pfn, | ||||||
| 			    unsigned long size, pgprot_t prot) | 			    unsigned long size, pgprot_t prot) | ||||||
| { | { | ||||||
|  | 	int ret; | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Check if oldmem_pfn_is_ram was registered to avoid | 	 * Check if oldmem_pfn_is_ram was registered to avoid | ||||||
| 	 * looping over all pages without a reason. | 	 * looping over all pages without a reason. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (oldmem_pfn_is_ram) | 	down_read(&vmcore_cb_rwsem); | ||||||
| 		return remap_oldmem_pfn_checked(vma, from, pfn, size, prot); | 	if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable) | ||||||
|  | 		ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot); | ||||||
| 	else | 	else | ||||||
| 		return remap_oldmem_pfn_range(vma, from, pfn, size, prot); | 		ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot); | ||||||
|  | 	up_read(&vmcore_cb_rwsem); | ||||||
|  | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||||||
|  | @ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| static const struct proc_ops vmcore_proc_ops = { | static const struct proc_ops vmcore_proc_ops = { | ||||||
|  | 	.proc_open	= open_vmcore, | ||||||
| 	.proc_read	= read_vmcore, | 	.proc_read	= read_vmcore, | ||||||
| 	.proc_lseek	= default_llseek, | 	.proc_lseek	= default_llseek, | ||||||
| 	.proc_mmap	= mmap_vmcore, | 	.proc_mmap	= mmap_vmcore, | ||||||
|  |  | ||||||
|  | @ -91,9 +91,29 @@ static inline void vmcore_unusable(void) | ||||||
| 		elfcorehdr_addr = ELFCORE_ADDR_ERR; | 		elfcorehdr_addr = ELFCORE_ADDR_ERR; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define HAVE_OLDMEM_PFN_IS_RAM 1 | /**
 | ||||||
| extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)); |  * struct vmcore_cb - driver callbacks for /proc/vmcore handling | ||||||
| extern void unregister_oldmem_pfn_is_ram(void); |  * @pfn_is_ram: check whether a PFN really is RAM and should be accessed when | ||||||
|  |  *              reading the vmcore. Will return "true" if it is RAM or if the | ||||||
|  |  *              callback cannot tell. If any callback returns "false", it's not | ||||||
|  |  *              RAM and the page must not be accessed; zeroes should be | ||||||
|  |  *              indicated in the vmcore instead. For example, a ballooned page | ||||||
|  |  *              contains no data and reading from such a page will cause high | ||||||
|  |  *              load in the hypervisor. | ||||||
|  |  * @next: List head to manage registered callbacks internally; initialized by | ||||||
|  |  *        register_vmcore_cb(). | ||||||
|  |  * | ||||||
|  |  * vmcore callbacks allow drivers managing physical memory ranges to | ||||||
|  |  * coordinate with vmcore handling code, for example, to prevent accessing | ||||||
|  |  * physical memory ranges that should not be accessed when reading the vmcore, | ||||||
|  |  * although included in the vmcore header as memory ranges to dump. | ||||||
|  |  */ | ||||||
|  | struct vmcore_cb { | ||||||
|  | 	bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn); | ||||||
|  | 	struct list_head next; | ||||||
|  | }; | ||||||
|  | extern void register_vmcore_cb(struct vmcore_cb *cb); | ||||||
|  | extern void unregister_vmcore_cb(struct vmcore_cb *cb); | ||||||
| 
 | 
 | ||||||
| #else /* !CONFIG_CRASH_DUMP */ | #else /* !CONFIG_CRASH_DUMP */ | ||||||
| static inline bool is_kdump_kernel(void) { return 0; } | static inline bool is_kdump_kernel(void) { return 0; } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 David Hildenbrand
						David Hildenbrand