mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	When starting APs, confidential guests and paravisor guests need to know the CPU number, and the pattern of using the linear search has emerged in several places. With N processors that leads to the O(N^2) time complexity. Provide the CPU number in the AP wake up callback so that one can get the CPU number in constant time. Suggested-by: Michael Kelley <mhklinux@outlook.com> Signed-off-by: Roman Kisel <romank@linux.microsoft.com> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Michael Kelley <mhklinux@outlook.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Link: https://lore.kernel.org/r/20250507182227.7421-3-romank@linux.microsoft.com Signed-off-by: Wei Liu <wei.liu@kernel.org> Message-ID: <20250507182227.7421-3-romank@linux.microsoft.com>
		
			
				
	
	
		
			249 lines
		
	
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			249 lines
		
	
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-or-later
 | 
						|
#include <linux/acpi.h>
 | 
						|
#include <linux/cpu.h>
 | 
						|
#include <linux/delay.h>
 | 
						|
#include <linux/io.h>
 | 
						|
#include <linux/kexec.h>
 | 
						|
#include <linux/memblock.h>
 | 
						|
#include <linux/pgtable.h>
 | 
						|
#include <linux/sched/hotplug.h>
 | 
						|
#include <asm/apic.h>
 | 
						|
#include <asm/barrier.h>
 | 
						|
#include <asm/init.h>
 | 
						|
#include <asm/intel_pt.h>
 | 
						|
#include <asm/nmi.h>
 | 
						|
#include <asm/processor.h>
 | 
						|
#include <asm/reboot.h>
 | 
						|
 | 
						|
/* Physical address of the Multiprocessor Wakeup Structure mailbox */
 | 
						|
static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
 | 
						|
 | 
						|
/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
 | 
						|
static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
 | 
						|
 | 
						|
static u64 acpi_mp_pgd __ro_after_init;
 | 
						|
static u64 acpi_mp_reset_vector_paddr __ro_after_init;
 | 
						|
 | 
						|
static void acpi_mp_stop_this_cpu(void)
 | 
						|
{
 | 
						|
	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
 | 
						|
}
 | 
						|
 | 
						|
static void acpi_mp_play_dead(void)
 | 
						|
{
 | 
						|
	play_dead_common();
 | 
						|
	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
 | 
						|
}
 | 
						|
 | 
						|
static void acpi_mp_cpu_die(unsigned int cpu)
 | 
						|
{
 | 
						|
	u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
 | 
						|
	unsigned long timeout;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Use TEST mailbox command to prove that BIOS got control over
 | 
						|
	 * the CPU before declaring it dead.
 | 
						|
	 *
 | 
						|
	 * BIOS has to clear 'command' field of the mailbox.
 | 
						|
	 */
 | 
						|
	acpi_mp_wake_mailbox->apic_id = apicid;
 | 
						|
	smp_store_release(&acpi_mp_wake_mailbox->command,
 | 
						|
			  ACPI_MP_WAKE_COMMAND_TEST);
 | 
						|
 | 
						|
	/* Don't wait longer than a second. */
 | 
						|
	timeout = USEC_PER_SEC;
 | 
						|
	while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout)
 | 
						|
		udelay(1);
 | 
						|
 | 
						|
	if (!timeout)
 | 
						|
		pr_err("Failed to hand over CPU %d to BIOS\n", cpu);
 | 
						|
}
 | 
						|
 | 
						|
/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
 | 
						|
static void __init *alloc_pgt_page(void *dummy)
 | 
						|
{
 | 
						|
	return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 | 
						|
}
 | 
						|
 | 
						|
static void __init free_pgt_page(void *pgt, void *dummy)
 | 
						|
{
 | 
						|
	return memblock_free(pgt, PAGE_SIZE);
 | 
						|
}
 | 
						|
 | 
						|
static int __init acpi_mp_setup_reset(u64 reset_vector)
 | 
						|
{
 | 
						|
	struct x86_mapping_info info = {
 | 
						|
		.alloc_pgt_page = alloc_pgt_page,
 | 
						|
		.free_pgt_page	= free_pgt_page,
 | 
						|
		.page_flag      = __PAGE_KERNEL_LARGE_EXEC,
 | 
						|
		.kernpg_flag    = _KERNPG_TABLE_NOENC,
 | 
						|
	};
 | 
						|
	unsigned long mstart, mend;
 | 
						|
	pgd_t *pgd;
 | 
						|
 | 
						|
	pgd = alloc_pgt_page(NULL);
 | 
						|
	if (!pgd)
 | 
						|
		return -ENOMEM;
 | 
						|
 | 
						|
	for (int i = 0; i < nr_pfn_mapped; i++) {
 | 
						|
		mstart = pfn_mapped[i].start << PAGE_SHIFT;
 | 
						|
		mend   = pfn_mapped[i].end << PAGE_SHIFT;
 | 
						|
		if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
 | 
						|
			kernel_ident_mapping_free(&info, pgd);
 | 
						|
			return -ENOMEM;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	mstart = PAGE_ALIGN_DOWN(reset_vector);
 | 
						|
	mend = mstart + PAGE_SIZE;
 | 
						|
	if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
 | 
						|
		kernel_ident_mapping_free(&info, pgd);
 | 
						|
		return -ENOMEM;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Make sure asm_acpi_mp_play_dead() is present in the identity mapping
 | 
						|
	 * at the same place as in the kernel page tables.
 | 
						|
	 * asm_acpi_mp_play_dead() switches to the identity mapping and the
 | 
						|
	 * function must be present at the same spot in the virtual address space
 | 
						|
	 * before and after switching page tables.
 | 
						|
	 */
 | 
						|
	info.offset = __START_KERNEL_map - phys_base;
 | 
						|
	mstart = PAGE_ALIGN_DOWN(__pa(asm_acpi_mp_play_dead));
 | 
						|
	mend = mstart + PAGE_SIZE;
 | 
						|
	if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
 | 
						|
		kernel_ident_mapping_free(&info, pgd);
 | 
						|
		return -ENOMEM;
 | 
						|
	}
 | 
						|
 | 
						|
	smp_ops.play_dead = acpi_mp_play_dead;
 | 
						|
	smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu;
 | 
						|
	smp_ops.cpu_die = acpi_mp_cpu_die;
 | 
						|
 | 
						|
	acpi_mp_reset_vector_paddr = reset_vector;
 | 
						|
	acpi_mp_pgd = __pa(pgd);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip, unsigned int cpu)
 | 
						|
{
 | 
						|
	if (!acpi_mp_wake_mailbox_paddr) {
 | 
						|
		pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n");
 | 
						|
		return -EOPNOTSUPP;
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
 | 
						|
	 *
 | 
						|
	 * Wakeup of secondary CPUs is fully serialized in the core code.
 | 
						|
	 * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
 | 
						|
	 */
 | 
						|
	if (!acpi_mp_wake_mailbox) {
 | 
						|
		acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
 | 
						|
						sizeof(*acpi_mp_wake_mailbox),
 | 
						|
						MEMREMAP_WB);
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Mailbox memory is shared between the firmware and OS. Firmware will
 | 
						|
	 * listen on mailbox command address, and once it receives the wakeup
 | 
						|
	 * command, the CPU associated with the given apicid will be booted.
 | 
						|
	 *
 | 
						|
	 * The value of 'apic_id' and 'wakeup_vector' must be visible to the
 | 
						|
	 * firmware before the wakeup command is visible.  smp_store_release()
 | 
						|
	 * ensures ordering and visibility.
 | 
						|
	 */
 | 
						|
	acpi_mp_wake_mailbox->apic_id	    = apicid;
 | 
						|
	acpi_mp_wake_mailbox->wakeup_vector = start_ip;
 | 
						|
	smp_store_release(&acpi_mp_wake_mailbox->command,
 | 
						|
			  ACPI_MP_WAKE_COMMAND_WAKEUP);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Wait for the CPU to wake up.
 | 
						|
	 *
 | 
						|
	 * The CPU being woken up is essentially in a spin loop waiting to be
 | 
						|
	 * woken up. It should not take long for it wake up and acknowledge by
 | 
						|
	 * zeroing out ->command.
 | 
						|
	 *
 | 
						|
	 * ACPI specification doesn't provide any guidance on how long kernel
 | 
						|
	 * has to wait for a wake up acknowledgment. It also doesn't provide
 | 
						|
	 * a way to cancel a wake up request if it takes too long.
 | 
						|
	 *
 | 
						|
	 * In TDX environment, the VMM has control over how long it takes to
 | 
						|
	 * wake up secondary. It can postpone scheduling secondary vCPU
 | 
						|
	 * indefinitely. Giving up on wake up request and reporting error opens
 | 
						|
	 * possible attack vector for VMM: it can wake up a secondary CPU when
 | 
						|
	 * kernel doesn't expect it. Wait until positive result of the wake up
 | 
						|
	 * request.
 | 
						|
	 */
 | 
						|
	while (READ_ONCE(acpi_mp_wake_mailbox->command))
 | 
						|
		cpu_relax();
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake)
 | 
						|
{
 | 
						|
	cpu_hotplug_disable_offlining();
 | 
						|
 | 
						|
	/*
 | 
						|
	 * ACPI MADT doesn't allow to offline a CPU after it was onlined. This
 | 
						|
	 * limits kexec: the second kernel won't be able to use more than one CPU.
 | 
						|
	 *
 | 
						|
	 * To prevent a kexec kernel from onlining secondary CPUs invalidate the
 | 
						|
	 * mailbox address in the ACPI MADT wakeup structure which prevents a
 | 
						|
	 * kexec kernel to use it.
 | 
						|
	 *
 | 
						|
	 * This is safe as the booting kernel has the mailbox address cached
 | 
						|
	 * already and acpi_wakeup_cpu() uses the cached value to bring up the
 | 
						|
	 * secondary CPUs.
 | 
						|
	 *
 | 
						|
	 * Note: This is a Linux specific convention and not covered by the
 | 
						|
	 *       ACPI specification.
 | 
						|
	 */
 | 
						|
	mp_wake->mailbox_address = 0;
 | 
						|
}
 | 
						|
 | 
						|
int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
 | 
						|
			      const unsigned long end)
 | 
						|
{
 | 
						|
	struct acpi_madt_multiproc_wakeup *mp_wake;
 | 
						|
 | 
						|
	mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
 | 
						|
	 * entry.  'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
 | 
						|
	 * than the actual size of the MP wakeup entry in ACPI table because the
 | 
						|
	 * 'reset_vector' is only available in the V1 MP wakeup structure.
 | 
						|
	 */
 | 
						|
	if (!mp_wake)
 | 
						|
		return -EINVAL;
 | 
						|
	if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
 | 
						|
		return -EINVAL;
 | 
						|
	if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	acpi_table_print_madt_entry(&header->common);
 | 
						|
 | 
						|
	acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
 | 
						|
 | 
						|
	if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
 | 
						|
	    mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
 | 
						|
		if (acpi_mp_setup_reset(mp_wake->reset_vector)) {
 | 
						|
			pr_warn("Failed to setup MADT reset vector\n");
 | 
						|
			acpi_mp_disable_offlining(mp_wake);
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		/*
 | 
						|
		 * CPU offlining requires version 1 of the ACPI MADT wakeup
 | 
						|
		 * structure.
 | 
						|
		 */
 | 
						|
		acpi_mp_disable_offlining(mp_wake);
 | 
						|
	}
 | 
						|
 | 
						|
	apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 |