mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
If the handler took any action to log or deal with the error, set a bit in mce->kflags so that the default handler on the end of the machine check chain can see what has been done. Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers skip over errors already processed by CEC. Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Tested-by: Tony Luck <tony.luck@intel.com> Link: https://lkml.kernel.org/r/20200214222720.13168-5-tony.luck@intel.com
This commit is contained in:
		
							parent
							
								
									1de08dccd3
								
							
						
					
					
						commit
						23ba710a08
					
				
					 10 changed files with 37 additions and 12 deletions
				
			
		| 
						 | 
				
			
			@ -581,8 +581,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	pfn = mce->addr >> PAGE_SHIFT;
 | 
			
		||||
	if (!memory_failure(pfn, 0))
 | 
			
		||||
	if (!memory_failure(pfn, 0)) {
 | 
			
		||||
		set_mce_nospec(pfn);
 | 
			
		||||
		mce->kflags |= MCE_HANDLED_UC;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -39,6 +39,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	struct mce *mce = (struct mce *)data;
 | 
			
		||||
	unsigned int entry;
 | 
			
		||||
 | 
			
		||||
	if (mce->kflags & MCE_HANDLED_CEC)
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	mutex_lock(&mce_chrdev_read_mutex);
 | 
			
		||||
 | 
			
		||||
	entry = mcelog->next;
 | 
			
		||||
| 
						 | 
				
			
			@ -56,6 +59,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
 | 
			
		||||
	memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
 | 
			
		||||
	mcelog->entry[entry].finished = 1;
 | 
			
		||||
	mcelog->entry[entry].kflags = 0;
 | 
			
		||||
 | 
			
		||||
	/* wake processes polling /dev/mcelog */
 | 
			
		||||
	wake_up_interruptible(&mce_chrdev_wait);
 | 
			
		||||
| 
						 | 
				
			
			@ -63,6 +67,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
unlock:
 | 
			
		||||
	mutex_unlock(&mce_chrdev_read_mutex);
 | 
			
		||||
 | 
			
		||||
	mce->kflags |= MCE_HANDLED_MCELOG;
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -146,7 +146,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	static u32 err_seq;
 | 
			
		||||
 | 
			
		||||
	estatus = extlog_elog_entry_check(cpu, bank);
 | 
			
		||||
	if (estatus == NULL)
 | 
			
		||||
	if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
 | 
			
		||||
| 
						 | 
				
			
			@ -176,7 +176,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	}
 | 
			
		||||
 | 
			
		||||
out:
 | 
			
		||||
	return NOTIFY_STOP;
 | 
			
		||||
	mce->kflags |= MCE_HANDLED_EXTLOG;
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool __init extlog_get_l1addr(void)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -76,6 +76,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
			 */
 | 
			
		||||
			acpi_nfit_ars_rescan(acpi_desc, 0);
 | 
			
		||||
		}
 | 
			
		||||
		mce->kflags |= MCE_HANDLED_NFIT;
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1815,7 +1815,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	struct mem_ctl_info *mci;
 | 
			
		||||
 | 
			
		||||
	i7_dev = get_i7core_dev(mce->socketid);
 | 
			
		||||
	if (!i7_dev)
 | 
			
		||||
	if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	mci = i7_dev->mci;
 | 
			
		||||
| 
						 | 
				
			
			@ -1834,7 +1834,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	i7core_check_error(mci, mce);
 | 
			
		||||
 | 
			
		||||
	/* Advise mcelog that the errors were handled */
 | 
			
		||||
	return NOTIFY_STOP;
 | 
			
		||||
	mce->kflags |= MCE_HANDLED_EDAC;
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct notifier_block i7_mce_dec = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1046,6 +1046,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 | 
			
		|||
	unsigned int fam = x86_family(m->cpuid);
 | 
			
		||||
	int ecc;
 | 
			
		||||
 | 
			
		||||
	if (m->kflags & MCE_HANDLED_CEC)
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	pr_emerg(HW_ERR "%s\n", decode_error_status(m));
 | 
			
		||||
 | 
			
		||||
	pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
 | 
			
		||||
| 
						 | 
				
			
			@ -1146,7 +1149,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 | 
			
		|||
 err_code:
 | 
			
		||||
	amd_decode_err_code(m->status & 0xffff);
 | 
			
		||||
 | 
			
		||||
	return NOTIFY_STOP;
 | 
			
		||||
	m->kflags |= MCE_HANDLED_EDAC;
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct notifier_block amd_mce_dec_nb = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1400,7 +1400,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 | 
			
		|||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	mci = pnd2_mci;
 | 
			
		||||
	if (!mci)
 | 
			
		||||
	if (!mci || (mce->kflags & MCE_HANDLED_CEC))
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1429,7 +1429,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 | 
			
		|||
	pnd2_mce_output_error(mci, mce, &daddr);
 | 
			
		||||
 | 
			
		||||
	/* Advice mcelog that the error were handled */
 | 
			
		||||
	return NOTIFY_STOP;
 | 
			
		||||
	mce->kflags |= MCE_HANDLED_EDAC;
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct notifier_block pnd2_mce_dec = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3136,6 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
 | 
			
		||||
	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
	if (mce->kflags & MCE_HANDLED_CEC)
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Just let mcelog handle it if the error is
 | 
			
		||||
| 
						 | 
				
			
			@ -3183,7 +3185,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	sbridge_mce_output_error(mci, mce);
 | 
			
		||||
 | 
			
		||||
	/* Advice mcelog that the error were handled */
 | 
			
		||||
	return NOTIFY_STOP;
 | 
			
		||||
	mce->kflags |= MCE_HANDLED_EDAC;
 | 
			
		||||
	return NOTIFY_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static struct notifier_block sbridge_mce_dec = {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -577,6 +577,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	if (mce->kflags & MCE_HANDLED_CEC)
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
 | 
			
		||||
	/* ignore unless this is memory related with an address */
 | 
			
		||||
	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
 | 
			
		||||
		return NOTIFY_DONE;
 | 
			
		||||
| 
						 | 
				
			
			@ -616,6 +619,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
 | 
			
		||||
	skx_mce_output_error(mci, mce, &res);
 | 
			
		||||
 | 
			
		||||
	mce->kflags |= MCE_HANDLED_EDAC;
 | 
			
		||||
	return NOTIFY_DONE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -538,9 +538,12 @@ static int cec_notifier(struct notifier_block *nb, unsigned long val,
 | 
			
		|||
	/* We eat only correctable DRAM errors with usable addresses. */
 | 
			
		||||
	if (mce_is_memory_error(m) &&
 | 
			
		||||
	    mce_is_correctable(m)  &&
 | 
			
		||||
	    mce_usable_address(m))
 | 
			
		||||
		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
 | 
			
		||||
			return NOTIFY_STOP;
 | 
			
		||||
	    mce_usable_address(m)) {
 | 
			
		||||
		if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
 | 
			
		||||
			m->kflags |= MCE_HANDLED_CEC;
 | 
			
		||||
			return NOTIFY_OK;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return NOTIFY_DONE;
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue