mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	x86/smpboot: Remove initial_stack on 64-bit
In order to facilitate parallel startup, start to eliminate some of the global variables passing information to CPUs in the startup path. However, start by introducing one more: smpboot_control. For now this merely holds the CPU# of the CPU which is coming up. Each CPU can then find its own per-cpu data, and everything else it needs can be found from there, allowing the other global variables to be removed. First to be removed is initial_stack. Each CPU can load %rsp from its current_task->thread.sp instead. That is already set up with the correct idle thread for APs. Set up the .sp field in INIT_THREAD on x86 so that the BSP also finds a suitable stack pointer in the static per-cpu data when coming up on first boot. On resume from S3, the CPU needs a temporary stack because its idle task is already active. Instead of setting initial_stack, the sleep code can simply set its own current->thread.sp to point to the temporary stack. Nobody else cares about ->thread.sp for a thread which is currently on a CPU, because the true value is actually in the %rsp register. Which is restored with the rest of the CPU context in do_suspend_lowlevel(). Signed-off-by: Brian Gerst <brgerst@gmail.com> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Usama Arif <usama.arif@bytedance.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Usama Arif <usama.arif@bytedance.com> Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> Link: https://lore.kernel.org/r/20230316222109.1940300-7-usama.arif@bytedance.com
This commit is contained in:
		
							parent
							
								
									cefad862f2
								
							
						
					
					
						commit
						3adee777ad
					
				
					 7 changed files with 63 additions and 21 deletions
				
			
		| 
						 | 
				
			
			@ -647,7 +647,11 @@ static inline void spin_lock_prefetch(const void *x)
 | 
			
		|||
#define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
#define INIT_THREAD { }
 | 
			
		||||
extern unsigned long __end_init_task[];
 | 
			
		||||
 | 
			
		||||
#define INIT_THREAD {							    \
 | 
			
		||||
	.sp	= (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern unsigned long KSTK_ESP(struct task_struct *task);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -199,5 +199,8 @@ extern void nmi_selftest(void);
 | 
			
		|||
#define nmi_selftest() do { } while (0)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif /* __ASSEMBLY__ */
 | 
			
		||||
extern unsigned int smpboot_control;
 | 
			
		||||
 | 
			
		||||
#endif /* !__ASSEMBLY__ */
 | 
			
		||||
 | 
			
		||||
#endif /* _ASM_X86_SMP_H */
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -111,10 +111,26 @@ int x86_acpi_suspend_lowlevel(void)
 | 
			
		|||
	saved_magic = 0x12345678;
 | 
			
		||||
#else /* CONFIG_64BIT */
 | 
			
		||||
#ifdef CONFIG_SMP
 | 
			
		||||
	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 | 
			
		||||
	/*
 | 
			
		||||
	 * As each CPU starts up, it will find its own stack pointer
 | 
			
		||||
	 * from its current_task->thread.sp. Typically that will be
 | 
			
		||||
	 * the idle thread for a newly-started AP, or even the boot
 | 
			
		||||
	 * CPU which will find it set to &init_task in the static
 | 
			
		||||
	 * per-cpu data.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Make the resuming CPU use the temporary stack at startup
 | 
			
		||||
	 * by setting current->thread.sp to point to that. The true
 | 
			
		||||
	 * %rsp will be restored with the rest of the CPU context,
 | 
			
		||||
	 * by do_suspend_lowlevel(). And unwinders don't care about
 | 
			
		||||
	 * the abuse of ->thread.sp because it's a dead variable
 | 
			
		||||
	 * while the thread is running on the CPU anyway; the true
 | 
			
		||||
	 * value is in the actual %rsp register.
 | 
			
		||||
	 */
 | 
			
		||||
	current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
 | 
			
		||||
	early_gdt_descr.address =
 | 
			
		||||
			(unsigned long)get_cpu_gdt_rw(smp_processor_id());
 | 
			
		||||
	initial_gs = per_cpu_offset(smp_processor_id());
 | 
			
		||||
	smpboot_control = smp_processor_id();
 | 
			
		||||
#endif
 | 
			
		||||
	initial_code = (unsigned long)wakeup_long64;
 | 
			
		||||
	saved_magic = 0x123456789abcdef0L;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -115,6 +115,7 @@ static void __used common(void)
 | 
			
		|||
	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
 | 
			
		||||
	OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
 | 
			
		||||
	OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
 | 
			
		||||
	OFFSET(X86_current_task, pcpu_hot, current_task);
 | 
			
		||||
#ifdef CONFIG_CALL_DEPTH_TRACKING
 | 
			
		||||
	OFFSET(X86_call_depth, pcpu_hot, call_depth);
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -61,8 +61,8 @@ SYM_CODE_START_NOALIGN(startup_64)
 | 
			
		|||
	 * tables and then reload them.
 | 
			
		||||
	 */
 | 
			
		||||
 | 
			
		||||
	/* Set up the stack for verify_cpu(), similar to initial_stack below */
 | 
			
		||||
	leaq	(__end_init_task - FRAME_SIZE)(%rip), %rsp
 | 
			
		||||
	/* Set up the stack for verify_cpu() */
 | 
			
		||||
	leaq	(__end_init_task - PTREGS_SIZE)(%rip), %rsp
 | 
			
		||||
 | 
			
		||||
	leaq	_text(%rip), %rdi
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -241,6 +241,24 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 | 
			
		|||
	UNWIND_HINT_EMPTY
 | 
			
		||||
	ANNOTATE_NOENDBR // above
 | 
			
		||||
 | 
			
		||||
#ifdef CONFIG_SMP
 | 
			
		||||
	movl	smpboot_control(%rip), %ecx
 | 
			
		||||
 | 
			
		||||
	/* Get the per cpu offset for the given CPU# which is in ECX */
 | 
			
		||||
	movq	__per_cpu_offset(,%rcx,8), %rdx
 | 
			
		||||
#else
 | 
			
		||||
	xorl	%edx, %edx /* zero-extended to clear all of RDX */
 | 
			
		||||
#endif /* CONFIG_SMP */
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Setup a boot time stack - Any secondary CPU will have lost its stack
 | 
			
		||||
	 * by now because the cr3-switch above unmaps the real-mode stack.
 | 
			
		||||
	 *
 | 
			
		||||
	 * RDX contains the per-cpu offset
 | 
			
		||||
	 */
 | 
			
		||||
	movq	pcpu_hot + X86_current_task(%rdx), %rax
 | 
			
		||||
	movq	TASK_threadsp(%rax), %rsp
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * We must switch to a new descriptor in kernel space for the GDT
 | 
			
		||||
	 * because soon the kernel won't have access anymore to the userspace
 | 
			
		||||
| 
						 | 
				
			
			@ -275,12 +293,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 | 
			
		|||
	movl	initial_gs+4(%rip),%edx
 | 
			
		||||
	wrmsr
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Setup a boot time stack - Any secondary CPU will have lost its stack
 | 
			
		||||
	 * by now because the cr3-switch above unmaps the real-mode stack
 | 
			
		||||
	 */
 | 
			
		||||
	movq initial_stack(%rip), %rsp
 | 
			
		||||
 | 
			
		||||
	/* Setup and Load IDT */
 | 
			
		||||
	pushq	%rsi
 | 
			
		||||
	call	early_setup_idt
 | 
			
		||||
| 
						 | 
				
			
			@ -372,7 +384,11 @@ SYM_CODE_END(secondary_startup_64)
 | 
			
		|||
SYM_CODE_START(start_cpu0)
 | 
			
		||||
	ANNOTATE_NOENDBR
 | 
			
		||||
	UNWIND_HINT_EMPTY
 | 
			
		||||
	movq	initial_stack(%rip), %rsp
 | 
			
		||||
 | 
			
		||||
	/* Find the idle task stack */
 | 
			
		||||
	movq	PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
 | 
			
		||||
	movq	TASK_threadsp(%rcx), %rsp
 | 
			
		||||
 | 
			
		||||
	jmp	.Ljump_to_C_code
 | 
			
		||||
SYM_CODE_END(start_cpu0)
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			@ -420,12 +436,6 @@ SYM_DATA(initial_gs,	.quad INIT_PER_CPU_VAR(fixed_percpu_data))
 | 
			
		|||
#ifdef CONFIG_AMD_MEM_ENCRYPT
 | 
			
		||||
SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
 | 
			
		||||
 * reliably detect the end of the stack.
 | 
			
		||||
 */
 | 
			
		||||
SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
 | 
			
		||||
	__FINITDATA
 | 
			
		||||
 | 
			
		||||
	__INIT
 | 
			
		||||
| 
						 | 
				
			
			@ -660,6 +670,9 @@ SYM_DATA_END(level1_fixmap_pgt)
 | 
			
		|||
SYM_DATA(early_gdt_descr,		.word GDT_ENTRIES*8-1)
 | 
			
		||||
SYM_DATA_LOCAL(early_gdt_descr_base,	.quad INIT_PER_CPU_VAR(gdt_page))
 | 
			
		||||
 | 
			
		||||
	.align 16
 | 
			
		||||
SYM_DATA(smpboot_control,		.long 0)
 | 
			
		||||
 | 
			
		||||
	.align 16
 | 
			
		||||
/* This must match the first entry in level2_kernel_pgt */
 | 
			
		||||
SYM_DATA(phys_base, .quad 0x0)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1088,7 +1088,12 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
 | 
			
		|||
	idle->thread.sp = (unsigned long)task_pt_regs(idle);
 | 
			
		||||
	early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
 | 
			
		||||
	initial_code = (unsigned long)start_secondary;
 | 
			
		||||
 | 
			
		||||
	if (IS_ENABLED(CONFIG_X86_32)) {
 | 
			
		||||
		initial_stack  = idle->thread.sp;
 | 
			
		||||
	} else {
 | 
			
		||||
		smpboot_control = cpu;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* Enable the espfix hack for this CPU */
 | 
			
		||||
	init_espfix_ap(cpu);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen)
 | 
			
		|||
	ANNOTATE_NOENDBR
 | 
			
		||||
	cld
 | 
			
		||||
 | 
			
		||||
	mov initial_stack(%rip), %rsp
 | 
			
		||||
	leaq	(__end_init_task - PTREGS_SIZE)(%rip), %rsp
 | 
			
		||||
 | 
			
		||||
	/* Set up %gs.
 | 
			
		||||
	 *
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue