forked from mirrors/linux
		
	x86/mce: Only restart instruction after machine check recovery if it is safe
Section 15.3.1.2 of the software developer manual has this to say about the RIPV bit in the IA32_MCG_STATUS register: RIPV (restart IP valid) flag, bit 0 — Indicates (when set) that program execution can be restarted reliably at the instruction pointed to by the instruction pointer pushed on the stack when the machine-check exception is generated. When clear, the program cannot be reliably restarted at the pushed instruction pointer. We need to save the state of this bit in do_machine_check() and use it in mce_notify_process() to force a signal; even if memory_failure() says it made a complete recovery ... e.g. replaced a clean LRU page. Acked-by: Borislav Petkov <bp@amd64.org> Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
		
							parent
							
								
									d48b97b403
								
							
						
					
					
						commit
						dad1743e59
					
				
					 1 changed files with 11 additions and 3 deletions
				
			
		|  | @ -945,9 +945,10 @@ struct mce_info { | ||||||
| 	atomic_t		inuse; | 	atomic_t		inuse; | ||||||
| 	struct task_struct	*t; | 	struct task_struct	*t; | ||||||
| 	__u64			paddr; | 	__u64			paddr; | ||||||
|  | 	int			restartable; | ||||||
| } mce_info[MCE_INFO_MAX]; | } mce_info[MCE_INFO_MAX]; | ||||||
| 
 | 
 | ||||||
| static void mce_save_info(__u64 addr) | static void mce_save_info(__u64 addr, int c) | ||||||
| { | { | ||||||
| 	struct mce_info *mi; | 	struct mce_info *mi; | ||||||
| 
 | 
 | ||||||
|  | @ -955,6 +956,7 @@ static void mce_save_info(__u64 addr) | ||||||
| 		if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { | 		if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { | ||||||
| 			mi->t = current; | 			mi->t = current; | ||||||
| 			mi->paddr = addr; | 			mi->paddr = addr; | ||||||
|  | 			mi->restartable = c; | ||||||
| 			return; | 			return; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | @ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | ||||||
| 			mce_panic("Fatal machine check on current CPU", &m, msg); | 			mce_panic("Fatal machine check on current CPU", &m, msg); | ||||||
| 		if (worst == MCE_AR_SEVERITY) { | 		if (worst == MCE_AR_SEVERITY) { | ||||||
| 			/* schedule action before return to userland */ | 			/* schedule action before return to userland */ | ||||||
| 			mce_save_info(m.addr); | 			mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); | ||||||
| 			set_thread_flag(TIF_MCE_NOTIFY); | 			set_thread_flag(TIF_MCE_NOTIFY); | ||||||
| 		} else if (kill_it) { | 		} else if (kill_it) { | ||||||
| 			force_sig(SIGBUS, current); | 			force_sig(SIGBUS, current); | ||||||
|  | @ -1179,7 +1181,13 @@ void mce_notify_process(void) | ||||||
| 
 | 
 | ||||||
| 	pr_err("Uncorrected hardware memory error in user-access at %llx", | 	pr_err("Uncorrected hardware memory error in user-access at %llx", | ||||||
| 		 mi->paddr); | 		 mi->paddr); | ||||||
| 	if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) { | 	/*
 | ||||||
|  | 	 * We must call memory_failure() here even if the current process is | ||||||
|  | 	 * doomed. We still need to mark the page as poisoned and alert any | ||||||
|  | 	 * other users of the page. | ||||||
|  | 	 */ | ||||||
|  | 	if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || | ||||||
|  | 			   mi->restartable == 0) { | ||||||
| 		pr_err("Memory error not recovered"); | 		pr_err("Memory error not recovered"); | ||||||
| 		force_sig(SIGBUS, current); | 		force_sig(SIGBUS, current); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Tony Luck
						Tony Luck