forked from mirrors/linux
		
	x86/smp: Allow calling mwait_play_dead with an arbitrary hint
Introduce a helper function to allow offlined CPUs to enter idle states with a specific MWAIT hint. The new helper will be used in subsequent patches by the acpi_idle and intel_idle drivers. No functional change intended. Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com> Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Link: https://lore.kernel.org/all/20250205155211.329780-2-artem.bityutskiy%40linux.intel.com
This commit is contained in:
		
							parent
							
								
									1e66d6cf88
								
							
						
					
					
						commit
						a7dd183f0b
					
				
					 2 changed files with 51 additions and 42 deletions
				
			
		| 
						 | 
				
			
			@ -114,6 +114,7 @@ void wbinvd_on_cpu(int cpu);
 | 
			
		|||
int wbinvd_on_all_cpus(void);
 | 
			
		||||
 | 
			
		||||
void smp_kick_mwait_play_dead(void);
 | 
			
		||||
void mwait_play_dead(unsigned int eax_hint);
 | 
			
		||||
 | 
			
		||||
void native_smp_send_reschedule(int cpu);
 | 
			
		||||
void native_send_call_func_ipi(const struct cpumask *mask);
 | 
			
		||||
| 
						 | 
				
			
			@ -164,6 +165,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
 | 
			
		|||
{
 | 
			
		||||
	return (struct cpumask *)cpumask_of(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline void mwait_play_dead(unsigned int eax_hint) { }
 | 
			
		||||
#endif /* CONFIG_SMP */
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_DEBUG_NMI_SELFTEST
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1258,13 +1258,57 @@ void play_dead_common(void)
 | 
			
		|||
	local_irq_disable();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void __noreturn mwait_play_dead(unsigned int eax_hint)
 | 
			
		||||
{
 | 
			
		||||
	struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
 | 
			
		||||
 | 
			
		||||
	/* Set up state for the kexec() hack below */
 | 
			
		||||
	md->status = CPUDEAD_MWAIT_WAIT;
 | 
			
		||||
	md->control = CPUDEAD_MWAIT_WAIT;
 | 
			
		||||
 | 
			
		||||
	wbinvd();
 | 
			
		||||
 | 
			
		||||
	while (1) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * The CLFLUSH is a workaround for erratum AAI65 for
 | 
			
		||||
		 * the Xeon 7400 series.  It's not clear it is actually
 | 
			
		||||
		 * needed, but it should be harmless in either case.
 | 
			
		||||
		 * The WBINVD is insufficient due to the spurious-wakeup
 | 
			
		||||
		 * case where we return around the loop.
 | 
			
		||||
		 */
 | 
			
		||||
		mb();
 | 
			
		||||
		clflush(md);
 | 
			
		||||
		mb();
 | 
			
		||||
		__monitor(md, 0, 0);
 | 
			
		||||
		mb();
 | 
			
		||||
		__mwait(eax_hint, 0);
 | 
			
		||||
 | 
			
		||||
		if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
 | 
			
		||||
			/*
 | 
			
		||||
			 * Kexec is about to happen. Don't go back into mwait() as
 | 
			
		||||
			 * the kexec kernel might overwrite text and data including
 | 
			
		||||
			 * page tables and stack. So mwait() would resume when the
 | 
			
		||||
			 * monitor cache line is written to and then the CPU goes
 | 
			
		||||
			 * south due to overwritten text, page tables and stack.
 | 
			
		||||
			 *
 | 
			
		||||
			 * Note: This does _NOT_ protect against a stray MCE, NMI,
 | 
			
		||||
			 * SMI. They will resume execution at the instruction
 | 
			
		||||
			 * following the HLT instruction and run into the problem
 | 
			
		||||
			 * which this is trying to prevent.
 | 
			
		||||
			 */
 | 
			
		||||
			WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
 | 
			
		||||
			while(1)
 | 
			
		||||
				native_halt();
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * We need to flush the caches before going to sleep, lest we have
 | 
			
		||||
 * dirty data in our caches when we come back up.
 | 
			
		||||
 */
 | 
			
		||||
static inline void mwait_play_dead(void)
 | 
			
		||||
static inline void mwait_play_dead_cpuid_hint(void)
 | 
			
		||||
{
 | 
			
		||||
	struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
 | 
			
		||||
	unsigned int eax, ebx, ecx, edx;
 | 
			
		||||
	unsigned int highest_cstate = 0;
 | 
			
		||||
	unsigned int highest_subcstate = 0;
 | 
			
		||||
| 
						 | 
				
			
			@ -1300,45 +1344,7 @@ static inline void mwait_play_dead(void)
 | 
			
		|||
			(highest_subcstate - 1);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Set up state for the kexec() hack below */
 | 
			
		||||
	md->status = CPUDEAD_MWAIT_WAIT;
 | 
			
		||||
	md->control = CPUDEAD_MWAIT_WAIT;
 | 
			
		||||
 | 
			
		||||
	wbinvd();
 | 
			
		||||
 | 
			
		||||
	while (1) {
 | 
			
		||||
		/*
 | 
			
		||||
		 * The CLFLUSH is a workaround for erratum AAI65 for
 | 
			
		||||
		 * the Xeon 7400 series.  It's not clear it is actually
 | 
			
		||||
		 * needed, but it should be harmless in either case.
 | 
			
		||||
		 * The WBINVD is insufficient due to the spurious-wakeup
 | 
			
		||||
		 * case where we return around the loop.
 | 
			
		||||
		 */
 | 
			
		||||
		mb();
 | 
			
		||||
		clflush(md);
 | 
			
		||||
		mb();
 | 
			
		||||
		__monitor(md, 0, 0);
 | 
			
		||||
		mb();
 | 
			
		||||
		__mwait(eax, 0);
 | 
			
		||||
 | 
			
		||||
		if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
 | 
			
		||||
			/*
 | 
			
		||||
			 * Kexec is about to happen. Don't go back into mwait() as
 | 
			
		||||
			 * the kexec kernel might overwrite text and data including
 | 
			
		||||
			 * page tables and stack. So mwait() would resume when the
 | 
			
		||||
			 * monitor cache line is written to and then the CPU goes
 | 
			
		||||
			 * south due to overwritten text, page tables and stack.
 | 
			
		||||
			 *
 | 
			
		||||
			 * Note: This does _NOT_ protect against a stray MCE, NMI,
 | 
			
		||||
			 * SMI. They will resume execution at the instruction
 | 
			
		||||
			 * following the HLT instruction and run into the problem
 | 
			
		||||
			 * which this is trying to prevent.
 | 
			
		||||
			 */
 | 
			
		||||
			WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
 | 
			
		||||
			while(1)
 | 
			
		||||
				native_halt();
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	mwait_play_dead(eax);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
| 
						 | 
				
			
			@ -1391,7 +1397,7 @@ void native_play_dead(void)
 | 
			
		|||
	play_dead_common();
 | 
			
		||||
	tboot_shutdown(TB_SHUTDOWN_WFS);
 | 
			
		||||
 | 
			
		||||
	mwait_play_dead();
 | 
			
		||||
	mwait_play_dead_cpuid_hint();
 | 
			
		||||
	if (cpuidle_play_dead())
 | 
			
		||||
		hlt_play_dead();
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue