mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-01 00:58:39 +02:00 
			
		
		
		
	Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 pti updates from Ingo Molnar:
 "The main changes:
   - Make the IBPB barrier more strict and add STIBP support (Jiri
     Kosina)
   - Micro-optimize and clean up the entry code (Andy Lutomirski)
   - ... plus misc other fixes"
* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/speculation: Propagate information about RSB filling mitigation to sysfs
  x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation
  x86/speculation: Apply IBPB more strictly to avoid cross-process data leak
  x86/speculation: Add RETPOLINE_AMD support to the inline asm CALL_NOSPEC variant
  x86/CPU: Fix unused variable warning when !CONFIG_IA32_EMULATION
  x86/pti/64: Remove the SYSCALL64 entry trampoline
  x86/entry/64: Use the TSS sp2 slot for SYSCALL/SYSRET scratch space
  x86/entry/64: Document idtentry
			
			
This commit is contained in:
		
						commit
						d82924c3b8
					
				
					 19 changed files with 222 additions and 176 deletions
				
			
		|  | @ -142,67 +142,6 @@ END(native_usergs_sysret64) | ||||||
|  * with them due to bugs in both AMD and Intel CPUs. |  * with them due to bugs in both AMD and Intel CPUs. | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| 	.pushsection .entry_trampoline, "ax" |  | ||||||
| 
 |  | ||||||
| /* |  | ||||||
|  * The code in here gets remapped into cpu_entry_area's trampoline.  This means |  | ||||||
|  * that the assembler and linker have the wrong idea as to where this code |  | ||||||
|  * lives (and, in fact, it's mapped more than once, so it's not even at a |  | ||||||
|  * fixed address).  So we can't reference any symbols outside the entry |  | ||||||
|  * trampoline and expect it to work. |  | ||||||
|  * |  | ||||||
|  * Instead, we carefully abuse %rip-relative addressing. |  | ||||||
|  * _entry_trampoline(%rip) refers to the start of the remapped) entry |  | ||||||
|  * trampoline.  We can thus find cpu_entry_area with this macro: |  | ||||||
|  */ |  | ||||||
| 
 |  | ||||||
| #define CPU_ENTRY_AREA \ |  | ||||||
| 	_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) |  | ||||||
| 
 |  | ||||||
| /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ |  | ||||||
| #define RSP_SCRATCH	CPU_ENTRY_AREA_entry_stack + \ |  | ||||||
| 			SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA |  | ||||||
| 
 |  | ||||||
| ENTRY(entry_SYSCALL_64_trampoline) |  | ||||||
| 	UNWIND_HINT_EMPTY |  | ||||||
| 	swapgs |  | ||||||
| 
 |  | ||||||
| 	/* Stash the user RSP. */ |  | ||||||
| 	movq	%rsp, RSP_SCRATCH |  | ||||||
| 
 |  | ||||||
| 	/* Note: using %rsp as a scratch reg. */ |  | ||||||
| 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp |  | ||||||
| 
 |  | ||||||
| 	/* Load the top of the task stack into RSP */ |  | ||||||
| 	movq	CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp |  | ||||||
| 
 |  | ||||||
| 	/* Start building the simulated IRET frame. */ |  | ||||||
| 	pushq	$__USER_DS			/* pt_regs->ss */ |  | ||||||
| 	pushq	RSP_SCRATCH			/* pt_regs->sp */ |  | ||||||
| 	pushq	%r11				/* pt_regs->flags */ |  | ||||||
| 	pushq	$__USER_CS			/* pt_regs->cs */ |  | ||||||
| 	pushq	%rcx				/* pt_regs->ip */ |  | ||||||
| 
 |  | ||||||
| 	/* |  | ||||||
| 	 * x86 lacks a near absolute jump, and we can't jump to the real |  | ||||||
| 	 * entry text with a relative jump.  We could push the target |  | ||||||
| 	 * address and then use retq, but this destroys the pipeline on |  | ||||||
| 	 * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead, |  | ||||||
| 	 * spill RDI and restore it in a second-stage trampoline. |  | ||||||
| 	 */ |  | ||||||
| 	pushq	%rdi |  | ||||||
| 	movq	$entry_SYSCALL_64_stage2, %rdi |  | ||||||
| 	JMP_NOSPEC %rdi |  | ||||||
| END(entry_SYSCALL_64_trampoline) |  | ||||||
| 
 |  | ||||||
| 	.popsection |  | ||||||
| 
 |  | ||||||
| ENTRY(entry_SYSCALL_64_stage2) |  | ||||||
| 	UNWIND_HINT_EMPTY |  | ||||||
| 	popq	%rdi |  | ||||||
| 	jmp	entry_SYSCALL_64_after_hwframe |  | ||||||
| END(entry_SYSCALL_64_stage2) |  | ||||||
| 
 |  | ||||||
| ENTRY(entry_SYSCALL_64) | ENTRY(entry_SYSCALL_64) | ||||||
| 	UNWIND_HINT_EMPTY | 	UNWIND_HINT_EMPTY | ||||||
| 	/* | 	/* | ||||||
|  | @ -212,16 +151,14 @@ ENTRY(entry_SYSCALL_64) | ||||||
| 	 */ | 	 */ | ||||||
| 
 | 
 | ||||||
| 	swapgs | 	swapgs | ||||||
| 	/* | 	/* tss.sp2 is scratch space. */ | ||||||
| 	 * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it | 	movq	%rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) | ||||||
| 	 * is not required to switch CR3. | 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp | ||||||
| 	 */ |  | ||||||
| 	movq	%rsp, PER_CPU_VAR(rsp_scratch) |  | ||||||
| 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp | 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp | ||||||
| 
 | 
 | ||||||
| 	/* Construct struct pt_regs on stack */ | 	/* Construct struct pt_regs on stack */ | ||||||
| 	pushq	$__USER_DS				/* pt_regs->ss */ | 	pushq	$__USER_DS				/* pt_regs->ss */ | ||||||
| 	pushq	PER_CPU_VAR(rsp_scratch)	/* pt_regs->sp */ | 	pushq	PER_CPU_VAR(cpu_tss_rw + TSS_sp2)	/* pt_regs->sp */ | ||||||
| 	pushq	%r11					/* pt_regs->flags */ | 	pushq	%r11					/* pt_regs->flags */ | ||||||
| 	pushq	$__USER_CS				/* pt_regs->cs */ | 	pushq	$__USER_CS				/* pt_regs->cs */ | ||||||
| 	pushq	%rcx					/* pt_regs->ip */ | 	pushq	%rcx					/* pt_regs->ip */ | ||||||
|  | @ -900,6 +837,42 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt | ||||||
|  */ |  */ | ||||||
| #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) | #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) | ||||||
| 
 | 
 | ||||||
|  | /** | ||||||
|  |  * idtentry - Generate an IDT entry stub | ||||||
|  |  * @sym:		Name of the generated entry point
 | ||||||
|  |  * @do_sym: 		C function to be called
 | ||||||
|  |  * @has_error_code: 	True if this IDT vector has an error code on the stack
 | ||||||
|  |  * @paranoid: 		non-zero means that this vector may be invoked from
 | ||||||
|  |  *			kernel mode with user GSBASE and/or user CR3. | ||||||
|  |  *			2 is special -- see below. | ||||||
|  |  * @shift_ist:		Set to an IST index if entries from kernel mode should
 | ||||||
|  |  *             		decrement the IST stack so that nested entries get a | ||||||
|  |  *			fresh stack.  (This is for #DB, which has a nasty habit | ||||||
|  |  *             		of recursing.) | ||||||
|  |  * | ||||||
|  |  * idtentry generates an IDT stub that sets up a usable kernel context, | ||||||
|  |  * creates struct pt_regs, and calls @do_sym.  The stub has the following
 | ||||||
|  |  * special behaviors: | ||||||
|  |  * | ||||||
|  |  * On an entry from user mode, the stub switches from the trampoline or | ||||||
|  |  * IST stack to the normal thread stack.  On an exit to user mode, the | ||||||
|  |  * normal exit-to-usermode path is invoked. | ||||||
|  |  * | ||||||
|  |  * On an exit to kernel mode, if @paranoid == 0, we check for preemption,
 | ||||||
|  |  * whereas we omit the preemption check if @paranoid != 0.  This is purely
 | ||||||
|  |  * because the implementation is simpler this way.  The kernel only needs | ||||||
|  |  * to check for asynchronous kernel preemption when IRQ handlers return. | ||||||
|  |  * | ||||||
|  |  * If @paranoid == 0, then the stub will handle IRET faults by pretending
 | ||||||
|  |  * that the fault came from user mode.  It will handle gs_change faults by | ||||||
|  |  * pretending that the fault happened with kernel GSBASE.  Since this handling | ||||||
|  |  * is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have
 | ||||||
|  |  * @paranoid == 0.  This special handling will do the wrong thing for
 | ||||||
|  |  * espfix-induced #DF on IRET, so #DF must not use @paranoid == 0. | ||||||
|  |  * | ||||||
|  |  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
 | ||||||
|  |  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. | ||||||
|  |  */ | ||||||
| .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 | ||||||
| ENTRY(\sym) | ENTRY(\sym) | ||||||
| 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8 | 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8 | ||||||
|  |  | ||||||
|  | @ -30,8 +30,6 @@ struct cpu_entry_area { | ||||||
| 	 */ | 	 */ | ||||||
| 	struct tss_struct tss; | 	struct tss_struct tss; | ||||||
| 
 | 
 | ||||||
| 	char entry_trampoline[PAGE_SIZE]; |  | ||||||
| 
 |  | ||||||
| #ifdef CONFIG_X86_64 | #ifdef CONFIG_X86_64 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Exception stacks used for IST entries. | 	 * Exception stacks used for IST entries. | ||||||
|  |  | ||||||
|  | @ -170,11 +170,15 @@ | ||||||
|  */ |  */ | ||||||
| # define CALL_NOSPEC						\ | # define CALL_NOSPEC						\ | ||||||
| 	ANNOTATE_NOSPEC_ALTERNATIVE				\ | 	ANNOTATE_NOSPEC_ALTERNATIVE				\ | ||||||
| 	ALTERNATIVE(						\ | 	ALTERNATIVE_2(						\ | ||||||
| 	ANNOTATE_RETPOLINE_SAFE					\ | 	ANNOTATE_RETPOLINE_SAFE					\ | ||||||
| 	"call *%[thunk_target]\n",				\ | 	"call *%[thunk_target]\n",				\ | ||||||
| 	"call __x86_indirect_thunk_%V[thunk_target]\n",		\ | 	"call __x86_indirect_thunk_%V[thunk_target]\n",		\ | ||||||
| 	X86_FEATURE_RETPOLINE) | 	X86_FEATURE_RETPOLINE,					\ | ||||||
|  | 	"lfence;\n"						\ | ||||||
|  | 	ANNOTATE_RETPOLINE_SAFE					\ | ||||||
|  | 	"call *%[thunk_target]\n",				\ | ||||||
|  | 	X86_FEATURE_RETPOLINE_AMD) | ||||||
| # define THUNK_TARGET(addr) [thunk_target] "r" (addr) | # define THUNK_TARGET(addr) [thunk_target] "r" (addr) | ||||||
| 
 | 
 | ||||||
| #elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) | #elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) | ||||||
|  | @ -184,7 +188,8 @@ | ||||||
|  * here, anyway. |  * here, anyway. | ||||||
|  */ |  */ | ||||||
| # define CALL_NOSPEC						\ | # define CALL_NOSPEC						\ | ||||||
| 	ALTERNATIVE(						\ | 	ANNOTATE_NOSPEC_ALTERNATIVE				\ | ||||||
|  | 	ALTERNATIVE_2(						\ | ||||||
| 	ANNOTATE_RETPOLINE_SAFE					\ | 	ANNOTATE_RETPOLINE_SAFE					\ | ||||||
| 	"call *%[thunk_target]\n",				\ | 	"call *%[thunk_target]\n",				\ | ||||||
| 	"       jmp    904f;\n"					\ | 	"       jmp    904f;\n"					\ | ||||||
|  | @ -199,7 +204,11 @@ | ||||||
| 	"       ret;\n"						\ | 	"       ret;\n"						\ | ||||||
| 	"       .align 16\n"					\ | 	"       .align 16\n"					\ | ||||||
| 	"904:	call   901b;\n",				\ | 	"904:	call   901b;\n",				\ | ||||||
| 	X86_FEATURE_RETPOLINE) | 	X86_FEATURE_RETPOLINE,					\ | ||||||
|  | 	"lfence;\n"						\ | ||||||
|  | 	ANNOTATE_RETPOLINE_SAFE					\ | ||||||
|  | 	"call *%[thunk_target]\n",				\ | ||||||
|  | 	X86_FEATURE_RETPOLINE_AMD) | ||||||
| 
 | 
 | ||||||
| # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) | # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) | ||||||
| #else /* No retpoline for C / inline asm */ | #else /* No retpoline for C / inline asm */ | ||||||
|  |  | ||||||
|  | @ -316,7 +316,13 @@ struct x86_hw_tss { | ||||||
| 	 */ | 	 */ | ||||||
| 	u64			sp1; | 	u64			sp1; | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Since Linux does not use ring 2, the 'sp2' slot is unused by | ||||||
|  | 	 * hardware.  entry_SYSCALL_64 uses it as scratch space to stash | ||||||
|  | 	 * the user RSP value. | ||||||
|  | 	 */ | ||||||
| 	u64			sp2; | 	u64			sp2; | ||||||
|  | 
 | ||||||
| 	u64			reserved2; | 	u64			reserved2; | ||||||
| 	u64			ist[7]; | 	u64			ist[7]; | ||||||
| 	u32			reserved3; | 	u32			reserved3; | ||||||
|  |  | ||||||
|  | @ -11,7 +11,6 @@ extern char __end_rodata_aligned[]; | ||||||
| 
 | 
 | ||||||
| #if defined(CONFIG_X86_64) | #if defined(CONFIG_X86_64) | ||||||
| extern char __end_rodata_hpage_align[]; | extern char __end_rodata_hpage_align[]; | ||||||
| extern char __entry_trampoline_start[], __entry_trampoline_end[]; |  | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #endif	/* _ASM_X86_SECTIONS_H */ | #endif	/* _ASM_X86_SECTIONS_H */ | ||||||
|  |  | ||||||
|  | @ -96,13 +96,12 @@ void common(void) { | ||||||
| 	OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); | 	OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); | ||||||
| 
 | 
 | ||||||
| 	/* Layout info for cpu_entry_area */ | 	/* Layout info for cpu_entry_area */ | ||||||
| 	OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); |  | ||||||
| 	OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); |  | ||||||
| 	OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); | 	OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); | ||||||
| 	DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); | 	DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); | ||||||
| 	DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1))); | 	DEFINE(MASK_entry_stack, (~(sizeof(struct entry_stack) - 1))); | ||||||
| 
 | 
 | ||||||
| 	/* Offset for sp0 and sp1 into the tss_struct */ | 	/* Offset for fields in tss_struct */ | ||||||
| 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); | 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); | ||||||
| 	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); | 	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); | ||||||
|  | 	OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -35,12 +35,10 @@ static void __init spectre_v2_select_mitigation(void); | ||||||
| static void __init ssb_select_mitigation(void); | static void __init ssb_select_mitigation(void); | ||||||
| static void __init l1tf_select_mitigation(void); | static void __init l1tf_select_mitigation(void); | ||||||
| 
 | 
 | ||||||
| /*
 | /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ | ||||||
|  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any | u64 x86_spec_ctrl_base; | ||||||
|  * writes to SPEC_CTRL contain whatever reserved bits have been set. |  | ||||||
|  */ |  | ||||||
| u64 __ro_after_init x86_spec_ctrl_base; |  | ||||||
| EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); | EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); | ||||||
|  | static DEFINE_MUTEX(spec_ctrl_mutex); | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * The vendor and possibly platform specific bits which can be modified in |  * The vendor and possibly platform specific bits which can be modified in | ||||||
|  | @ -326,6 +324,46 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) | ||||||
| 	return cmd; | 	return cmd; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static bool stibp_needed(void) | ||||||
|  | { | ||||||
|  | 	if (spectre_v2_enabled == SPECTRE_V2_NONE) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	if (!boot_cpu_has(X86_FEATURE_STIBP)) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void update_stibp_msr(void *info) | ||||||
|  | { | ||||||
|  | 	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void arch_smt_update(void) | ||||||
|  | { | ||||||
|  | 	u64 mask; | ||||||
|  | 
 | ||||||
|  | 	if (!stibp_needed()) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	mutex_lock(&spec_ctrl_mutex); | ||||||
|  | 	mask = x86_spec_ctrl_base; | ||||||
|  | 	if (cpu_smt_control == CPU_SMT_ENABLED) | ||||||
|  | 		mask |= SPEC_CTRL_STIBP; | ||||||
|  | 	else | ||||||
|  | 		mask &= ~SPEC_CTRL_STIBP; | ||||||
|  | 
 | ||||||
|  | 	if (mask != x86_spec_ctrl_base) { | ||||||
|  | 		pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", | ||||||
|  | 				cpu_smt_control == CPU_SMT_ENABLED ? | ||||||
|  | 				"Enabling" : "Disabling"); | ||||||
|  | 		x86_spec_ctrl_base = mask; | ||||||
|  | 		on_each_cpu(update_stibp_msr, NULL, 1); | ||||||
|  | 	} | ||||||
|  | 	mutex_unlock(&spec_ctrl_mutex); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void __init spectre_v2_select_mitigation(void) | static void __init spectre_v2_select_mitigation(void) | ||||||
| { | { | ||||||
| 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); | 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); | ||||||
|  | @ -426,6 +464,9 @@ static void __init spectre_v2_select_mitigation(void) | ||||||
| 		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); | 		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); | ||||||
| 		pr_info("Enabling Restricted Speculation for firmware calls\n"); | 		pr_info("Enabling Restricted Speculation for firmware calls\n"); | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Enable STIBP if appropriate */ | ||||||
|  | 	arch_smt_update(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #undef pr_fmt | #undef pr_fmt | ||||||
|  | @ -816,6 +857,8 @@ static ssize_t l1tf_show_state(char *buf) | ||||||
| static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, | static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, | ||||||
| 			       char *buf, unsigned int bug) | 			       char *buf, unsigned int bug) | ||||||
| { | { | ||||||
|  | 	int ret; | ||||||
|  | 
 | ||||||
| 	if (!boot_cpu_has_bug(bug)) | 	if (!boot_cpu_has_bug(bug)) | ||||||
| 		return sprintf(buf, "Not affected\n"); | 		return sprintf(buf, "Not affected\n"); | ||||||
| 
 | 
 | ||||||
|  | @ -833,10 +876,13 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr | ||||||
| 		return sprintf(buf, "Mitigation: __user pointer sanitization\n"); | 		return sprintf(buf, "Mitigation: __user pointer sanitization\n"); | ||||||
| 
 | 
 | ||||||
| 	case X86_BUG_SPECTRE_V2: | 	case X86_BUG_SPECTRE_V2: | ||||||
| 		return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], | 		ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], | ||||||
| 			       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", | 			       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", | ||||||
| 			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", | 			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", | ||||||
|  | 			       (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", | ||||||
|  | 			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", | ||||||
| 			       spectre_v2_module_string()); | 			       spectre_v2_module_string()); | ||||||
|  | 		return ret; | ||||||
| 
 | 
 | ||||||
| 	case X86_BUG_SPEC_STORE_BYPASS: | 	case X86_BUG_SPEC_STORE_BYPASS: | ||||||
| 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); | 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); | ||||||
|  |  | ||||||
|  | @ -1534,18 +1534,7 @@ EXPORT_PER_CPU_SYMBOL(__preempt_count); | ||||||
| /* May not be marked __init: used by software suspend */ | /* May not be marked __init: used by software suspend */ | ||||||
| void syscall_init(void) | void syscall_init(void) | ||||||
| { | { | ||||||
| 	extern char _entry_trampoline[]; |  | ||||||
| 	extern char entry_SYSCALL_64_trampoline[]; |  | ||||||
| 
 |  | ||||||
| 	int cpu = smp_processor_id(); |  | ||||||
| 	unsigned long SYSCALL64_entry_trampoline = |  | ||||||
| 		(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + |  | ||||||
| 		(entry_SYSCALL_64_trampoline - _entry_trampoline); |  | ||||||
| 
 |  | ||||||
| 	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); | 	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); | ||||||
| 	if (static_cpu_has(X86_FEATURE_PTI)) |  | ||||||
| 		wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); |  | ||||||
| 	else |  | ||||||
| 	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); | 	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_IA32_EMULATION | #ifdef CONFIG_IA32_EMULATION | ||||||
|  | @ -1557,7 +1546,8 @@ void syscall_init(void) | ||||||
| 	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). | 	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). | ||||||
| 	 */ | 	 */ | ||||||
| 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | ||||||
| 	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); | 	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, | ||||||
|  | 		    (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); | ||||||
| 	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); | 	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); | ||||||
| #else | #else | ||||||
| 	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); | 	wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); | ||||||
|  |  | ||||||
|  | @ -1028,18 +1028,10 @@ NOKPROBE_SYMBOL(kprobe_fault_handler); | ||||||
| 
 | 
 | ||||||
| bool arch_within_kprobe_blacklist(unsigned long addr) | bool arch_within_kprobe_blacklist(unsigned long addr) | ||||||
| { | { | ||||||
| 	bool is_in_entry_trampoline_section = false; |  | ||||||
| 
 |  | ||||||
| #ifdef CONFIG_X86_64 |  | ||||||
| 	is_in_entry_trampoline_section = |  | ||||||
| 		(addr >= (unsigned long)__entry_trampoline_start && |  | ||||||
| 		 addr < (unsigned long)__entry_trampoline_end); |  | ||||||
| #endif |  | ||||||
| 	return  (addr >= (unsigned long)__kprobes_text_start && | 	return  (addr >= (unsigned long)__kprobes_text_start && | ||||||
| 		 addr < (unsigned long)__kprobes_text_end) || | 		 addr < (unsigned long)__kprobes_text_end) || | ||||||
| 		(addr >= (unsigned long)__entry_text_start && | 		(addr >= (unsigned long)__entry_text_start && | ||||||
| 		 addr < (unsigned long)__entry_text_end) || | 		 addr < (unsigned long)__entry_text_end); | ||||||
| 		is_in_entry_trampoline_section; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int __init arch_init_kprobes(void) | int __init arch_init_kprobes(void) | ||||||
|  |  | ||||||
|  | @ -60,8 +60,6 @@ | ||||||
| #include <asm/unistd_32_ia32.h> | #include <asm/unistd_32_ia32.h> | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); |  | ||||||
| 
 |  | ||||||
| /* Prints also some state that isn't saved in the pt_regs */ | /* Prints also some state that isn't saved in the pt_regs */ | ||||||
| void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) | void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) | ||||||
| { | { | ||||||
|  |  | ||||||
|  | @ -383,6 +383,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||||||
| 		 * we won't enable interupts or schedule before we invoke | 		 * we won't enable interupts or schedule before we invoke | ||||||
| 		 * general_protection, so nothing will clobber the stack | 		 * general_protection, so nothing will clobber the stack | ||||||
| 		 * frame we just set up. | 		 * frame we just set up. | ||||||
|  | 		 * | ||||||
|  | 		 * We will enter general_protection with kernel GSBASE, | ||||||
|  | 		 * which is what the stub expects, given that the faulting | ||||||
|  | 		 * RIP will be the IRET instruction. | ||||||
| 		 */ | 		 */ | ||||||
| 		regs->ip = (unsigned long)general_protection; | 		regs->ip = (unsigned long)general_protection; | ||||||
| 		regs->sp = (unsigned long)&gpregs->orig_ax; | 		regs->sp = (unsigned long)&gpregs->orig_ax; | ||||||
|  |  | ||||||
|  | @ -136,16 +136,6 @@ SECTIONS | ||||||
| 		*(.fixup) | 		*(.fixup) | ||||||
| 		*(.gnu.warning) | 		*(.gnu.warning) | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_X86_64 |  | ||||||
| 		. = ALIGN(PAGE_SIZE);
 |  | ||||||
| 		__entry_trampoline_start = .;
 |  | ||||||
| 		_entry_trampoline = .;
 |  | ||||||
| 		*(.entry_trampoline) |  | ||||||
| 		. = ALIGN(PAGE_SIZE);
 |  | ||||||
| 		__entry_trampoline_end = .;
 |  | ||||||
| 		ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
 |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| #ifdef CONFIG_RETPOLINE | #ifdef CONFIG_RETPOLINE | ||||||
| 		__indirect_thunk_start = .;
 | 		__indirect_thunk_start = .;
 | ||||||
| 		*(.text.__x86.indirect_thunk) | 		*(.text.__x86.indirect_thunk) | ||||||
|  |  | ||||||
|  | @ -15,7 +15,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) | ||||||
| #ifdef CONFIG_X86_64 | #ifdef CONFIG_X86_64 | ||||||
| static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | ||||||
| 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | ||||||
| static DEFINE_PER_CPU(struct kcore_list, kcore_entry_trampoline); |  | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| struct cpu_entry_area *get_cpu_entry_area(int cpu) | struct cpu_entry_area *get_cpu_entry_area(int cpu) | ||||||
|  | @ -83,8 +82,6 @@ static void percpu_setup_debug_store(int cpu) | ||||||
| static void __init setup_cpu_entry_area(int cpu) | static void __init setup_cpu_entry_area(int cpu) | ||||||
| { | { | ||||||
| #ifdef CONFIG_X86_64 | #ifdef CONFIG_X86_64 | ||||||
| 	extern char _entry_trampoline[]; |  | ||||||
| 
 |  | ||||||
| 	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | 	/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | ||||||
| 	pgprot_t gdt_prot = PAGE_KERNEL_RO; | 	pgprot_t gdt_prot = PAGE_KERNEL_RO; | ||||||
| 	pgprot_t tss_prot = PAGE_KERNEL_RO; | 	pgprot_t tss_prot = PAGE_KERNEL_RO; | ||||||
|  | @ -146,43 +143,10 @@ static void __init setup_cpu_entry_area(int cpu) | ||||||
| 	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, | 	cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, | ||||||
| 			     &per_cpu(exception_stacks, cpu), | 			     &per_cpu(exception_stacks, cpu), | ||||||
| 			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); | 			     sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); | ||||||
| 
 |  | ||||||
| 	cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, |  | ||||||
| 		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); |  | ||||||
| 	/*
 |  | ||||||
| 	 * The cpu_entry_area alias addresses are not in the kernel binary |  | ||||||
| 	 * so they do not show up in /proc/kcore normally.  This adds entries |  | ||||||
| 	 * for them manually. |  | ||||||
| 	 */ |  | ||||||
| 	kclist_add_remap(&per_cpu(kcore_entry_trampoline, cpu), |  | ||||||
| 			 _entry_trampoline, |  | ||||||
| 			 &get_cpu_entry_area(cpu)->entry_trampoline, PAGE_SIZE); |  | ||||||
| #endif | #endif | ||||||
| 	percpu_setup_debug_store(cpu); | 	percpu_setup_debug_store(cpu); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef CONFIG_X86_64 |  | ||||||
| int arch_get_kallsym(unsigned int symnum, unsigned long *value, char *type, |  | ||||||
| 		     char *name) |  | ||||||
| { |  | ||||||
| 	unsigned int cpu, ncpu = 0; |  | ||||||
| 
 |  | ||||||
| 	if (symnum >= num_possible_cpus()) |  | ||||||
| 		return -EINVAL; |  | ||||||
| 
 |  | ||||||
| 	for_each_possible_cpu(cpu) { |  | ||||||
| 		if (ncpu++ >= symnum) |  | ||||||
| 			break; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	*value = (unsigned long)&get_cpu_entry_area(cpu)->entry_trampoline; |  | ||||||
| 	*type = 't'; |  | ||||||
| 	strlcpy(name, "__entry_SYSCALL_64_trampoline", KSYM_NAME_LEN); |  | ||||||
| 
 |  | ||||||
| 	return 0; |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| static __init void setup_cpu_entry_area_ptes(void) | static __init void setup_cpu_entry_area_ptes(void) | ||||||
| { | { | ||||||
| #ifdef CONFIG_X86_32 | #ifdef CONFIG_X86_32 | ||||||
|  |  | ||||||
|  | @ -434,11 +434,42 @@ static void __init pti_clone_p4d(unsigned long addr) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Clone the CPU_ENTRY_AREA into the user space visible page table. |  * Clone the CPU_ENTRY_AREA and associated data into the user space visible | ||||||
|  |  * page table. | ||||||
|  */ |  */ | ||||||
| static void __init pti_clone_user_shared(void) | static void __init pti_clone_user_shared(void) | ||||||
| { | { | ||||||
|  | 	unsigned int cpu; | ||||||
|  | 
 | ||||||
| 	pti_clone_p4d(CPU_ENTRY_AREA_BASE); | 	pti_clone_p4d(CPU_ENTRY_AREA_BASE); | ||||||
|  | 
 | ||||||
|  | 	for_each_possible_cpu(cpu) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * The SYSCALL64 entry code needs to be able to find the | ||||||
|  | 		 * thread stack and needs one word of scratch space in which | ||||||
|  | 		 * to spill a register.  All of this lives in the TSS, in | ||||||
|  | 		 * the sp1 and sp2 slots. | ||||||
|  | 		 * | ||||||
|  | 		 * This is done for all possible CPUs during boot to ensure | ||||||
|  | 		 * that it's propagated to all mms.  If we were to add one of | ||||||
|  | 		 * these mappings during CPU hotplug, we would need to take | ||||||
|  | 		 * some measure to make sure that every mm that subsequently | ||||||
|  | 		 * ran on that CPU would have the relevant PGD entry in its | ||||||
|  | 		 * pagetables.  The usual vmalloc_fault() mechanism would not | ||||||
|  | 		 * work for page faults taken in entry_SYSCALL_64 before RSP | ||||||
|  | 		 * is set up. | ||||||
|  | 		 */ | ||||||
|  | 
 | ||||||
|  | 		unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu); | ||||||
|  | 		phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); | ||||||
|  | 		pte_t *target_pte; | ||||||
|  | 
 | ||||||
|  | 		target_pte = pti_user_pagetable_walk_pte(va); | ||||||
|  | 		if (WARN_ON(!target_pte)) | ||||||
|  | 			return; | ||||||
|  | 
 | ||||||
|  | 		*target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL); | ||||||
|  | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #else /* CONFIG_X86_64 */ | #else /* CONFIG_X86_64 */ | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ | ||||||
| #include <linux/export.h> | #include <linux/export.h> | ||||||
| #include <linux/cpu.h> | #include <linux/cpu.h> | ||||||
| #include <linux/debugfs.h> | #include <linux/debugfs.h> | ||||||
|  | #include <linux/ptrace.h> | ||||||
| 
 | 
 | ||||||
| #include <asm/tlbflush.h> | #include <asm/tlbflush.h> | ||||||
| #include <asm/mmu_context.h> | #include <asm/mmu_context.h> | ||||||
|  | @ -180,6 +181,19 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	 * Check if the current (previous) task has access to the memory | ||||||
|  | 	 * of the @tsk (next) task. If access is denied, make sure to | ||||||
|  | 	 * issue a IBPB to stop user->user Spectre-v2 attacks. | ||||||
|  | 	 * | ||||||
|  | 	 * Note: __ptrace_may_access() returns 0 or -ERRNO. | ||||||
|  | 	 */ | ||||||
|  | 	return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && | ||||||
|  | 		ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | ||||||
| 			struct task_struct *tsk) | 			struct task_struct *tsk) | ||||||
| { | { | ||||||
|  | @ -286,18 +300,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | ||||||
| 		 * one process from doing Spectre-v2 attacks on another. | 		 * one process from doing Spectre-v2 attacks on another. | ||||||
| 		 * | 		 * | ||||||
| 		 * As an optimization, flush indirect branches only when | 		 * As an optimization, flush indirect branches only when | ||||||
| 		 * switching into processes that disable dumping. This | 		 * switching into a processes that can't be ptrace by the | ||||||
| 		 * protects high value processes like gpg, without having | 		 * current one (as in such case, attacker has much more | ||||||
| 		 * too high performance overhead. IBPB is *expensive*! | 		 * convenient way how to tamper with the next process than | ||||||
| 		 * | 		 * branch buffer poisoning). | ||||||
| 		 * This will not flush branches when switching into kernel |  | ||||||
| 		 * threads. It will also not flush if we switch to idle |  | ||||||
| 		 * thread and back to the same process. It will flush if we |  | ||||||
| 		 * switch to a different non-dumpable process. |  | ||||||
| 		 */ | 		 */ | ||||||
| 		if (tsk && tsk->mm && | 		if (static_cpu_has(X86_FEATURE_USE_IBPB) && | ||||||
| 		    tsk->mm->context.ctx_id != last_ctx_id && | 				ibpb_needed(tsk, last_ctx_id)) | ||||||
| 		    get_dumpable(tsk->mm) != SUID_DUMP_USER) |  | ||||||
| 			indirect_branch_prediction_barrier(); | 			indirect_branch_prediction_barrier(); | ||||||
| 
 | 
 | ||||||
| 		if (IS_ENABLED(CONFIG_VMAP_STACK)) { | 		if (IS_ENABLED(CONFIG_VMAP_STACK)) { | ||||||
|  |  | ||||||
|  | @ -91,13 +91,15 @@ ENTRY(xen_iret) | ||||||
| ENTRY(xen_sysret64) | ENTRY(xen_sysret64) | ||||||
| 	/* | 	/* | ||||||
| 	 * We're already on the usermode stack at this point, but | 	 * We're already on the usermode stack at this point, but | ||||||
| 	 * still with the kernel gs, so we can easily switch back | 	 * still with the kernel gs, so we can easily switch back. | ||||||
|  | 	 * | ||||||
|  | 	 * tss.sp2 is scratch space. | ||||||
| 	 */ | 	 */ | ||||||
| 	movq %rsp, PER_CPU_VAR(rsp_scratch) | 	movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) | ||||||
| 	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | 	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | ||||||
| 
 | 
 | ||||||
| 	pushq $__USER_DS | 	pushq $__USER_DS | ||||||
| 	pushq PER_CPU_VAR(rsp_scratch) | 	pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) | ||||||
| 	pushq %r11 | 	pushq %r11 | ||||||
| 	pushq $__USER_CS | 	pushq $__USER_CS | ||||||
| 	pushq %rcx | 	pushq %rcx | ||||||
|  |  | ||||||
|  | @ -64,12 +64,15 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); | ||||||
| #define PTRACE_MODE_NOAUDIT	0x04 | #define PTRACE_MODE_NOAUDIT	0x04 | ||||||
| #define PTRACE_MODE_FSCREDS	0x08 | #define PTRACE_MODE_FSCREDS	0x08 | ||||||
| #define PTRACE_MODE_REALCREDS	0x10 | #define PTRACE_MODE_REALCREDS	0x10 | ||||||
|  | #define PTRACE_MODE_SCHED	0x20 | ||||||
|  | #define PTRACE_MODE_IBPB	0x40 | ||||||
| 
 | 
 | ||||||
| /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ | /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ | ||||||
| #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) | #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) | ||||||
| #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) | #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) | ||||||
| #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) | #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) | ||||||
| #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) | #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) | ||||||
|  | #define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB) | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * ptrace_may_access - check whether the caller is permitted to access |  * ptrace_may_access - check whether the caller is permitted to access | ||||||
|  | @ -87,6 +90,20 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); | ||||||
|  */ |  */ | ||||||
| extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); | extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); | ||||||
| 
 | 
 | ||||||
|  | /**
 | ||||||
|  |  * ptrace_may_access - check whether the caller is permitted to access | ||||||
|  |  * a target task. | ||||||
|  |  * @task: target task | ||||||
|  |  * @mode: selects type of access and caller credentials | ||||||
|  |  * | ||||||
|  |  * Returns true on success, false on denial. | ||||||
|  |  * | ||||||
|  |  * Similar to ptrace_may_access(). Only to be called from context switch | ||||||
|  |  * code. Does not call into audit and the regular LSM hooks due to locking | ||||||
|  |  * constraints. | ||||||
|  |  */ | ||||||
|  | extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode); | ||||||
|  | 
 | ||||||
| static inline int ptrace_reparented(struct task_struct *child) | static inline int ptrace_reparented(struct task_struct *child) | ||||||
| { | { | ||||||
| 	return !same_thread_group(child->real_parent, child->parent); | 	return !same_thread_group(child->real_parent, child->parent); | ||||||
|  |  | ||||||
							
								
								
									
										11
									
								
								kernel/cpu.c
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								kernel/cpu.c
									
									
									
									
									
								
							|  | @ -2055,6 +2055,12 @@ static void cpuhp_online_cpu_device(unsigned int cpu) | ||||||
| 	kobject_uevent(&dev->kobj, KOBJ_ONLINE); | 	kobject_uevent(&dev->kobj, KOBJ_ONLINE); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Architectures that need SMT-specific errata handling during SMT hotplug | ||||||
|  |  * should override this. | ||||||
|  |  */ | ||||||
|  | void __weak arch_smt_update(void) { }; | ||||||
|  | 
 | ||||||
| static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) | static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) | ||||||
| { | { | ||||||
| 	int cpu, ret = 0; | 	int cpu, ret = 0; | ||||||
|  | @ -2081,8 +2087,10 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) | ||||||
| 		 */ | 		 */ | ||||||
| 		cpuhp_offline_cpu_device(cpu); | 		cpuhp_offline_cpu_device(cpu); | ||||||
| 	} | 	} | ||||||
| 	if (!ret) | 	if (!ret) { | ||||||
| 		cpu_smt_control = ctrlval; | 		cpu_smt_control = ctrlval; | ||||||
|  | 		arch_smt_update(); | ||||||
|  | 	} | ||||||
| 	cpu_maps_update_done(); | 	cpu_maps_update_done(); | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
|  | @ -2093,6 +2101,7 @@ static int cpuhp_smt_enable(void) | ||||||
| 
 | 
 | ||||||
| 	cpu_maps_update_begin(); | 	cpu_maps_update_begin(); | ||||||
| 	cpu_smt_control = CPU_SMT_ENABLED; | 	cpu_smt_control = CPU_SMT_ENABLED; | ||||||
|  | 	arch_smt_update(); | ||||||
| 	for_each_present_cpu(cpu) { | 	for_each_present_cpu(cpu) { | ||||||
| 		/* Skip online CPUs and CPUs on offline nodes */ | 		/* Skip online CPUs and CPUs on offline nodes */ | ||||||
| 		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) | 		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) | ||||||
|  |  | ||||||
|  | @ -261,6 +261,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) | ||||||
| 
 | 
 | ||||||
| static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) | static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) | ||||||
| { | { | ||||||
|  | 	if (mode & PTRACE_MODE_SCHED) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
| 	if (mode & PTRACE_MODE_NOAUDIT) | 	if (mode & PTRACE_MODE_NOAUDIT) | ||||||
| 		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); | 		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); | ||||||
| 	else | 	else | ||||||
|  | @ -328,9 +331,16 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) | ||||||
| 	     !ptrace_has_cap(mm->user_ns, mode))) | 	     !ptrace_has_cap(mm->user_ns, mode))) | ||||||
| 	    return -EPERM; | 	    return -EPERM; | ||||||
| 
 | 
 | ||||||
|  | 	if (mode & PTRACE_MODE_SCHED) | ||||||
|  | 		return 0; | ||||||
| 	return security_ptrace_access_check(task, mode); | 	return security_ptrace_access_check(task, mode); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode) | ||||||
|  | { | ||||||
|  | 	return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool ptrace_may_access(struct task_struct *task, unsigned int mode) | bool ptrace_may_access(struct task_struct *task, unsigned int mode) | ||||||
| { | { | ||||||
| 	int err; | 	int err; | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds