mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	x86/speculation/mds: Clear CPU buffers on exit to user
Add a static key which controls the invocation of the CPU buffer clear mechanism on exit to user space and add the call into prepare_exit_to_usermode() and do_nmi() right before actually returning. Add documentation which kernel to user space transition this covers and explain why some corner cases are not mitigated. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Reviewed-by: Borislav Petkov <bp@suse.de> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Reviewed-by: Jon Masters <jcm@redhat.com> Tested-by: Jon Masters <jcm@redhat.com>
This commit is contained in:
		
							parent
							
								
									6a9e529272
								
							
						
					
					
						commit
						04dcbdb805
					
				
					 6 changed files with 83 additions and 0 deletions
				
			
		| 
						 | 
					@ -97,3 +97,55 @@ According to current knowledge additional mitigations inside the kernel
 | 
				
			||||||
itself are not required because the necessary gadgets to expose the leaked
 | 
					itself are not required because the necessary gadgets to expose the leaked
 | 
				
			||||||
data cannot be controlled in a way which allows exploitation from malicious
 | 
					data cannot be controlled in a way which allows exploitation from malicious
 | 
				
			||||||
user space or VM guests.
 | 
					user space or VM guests.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Mitigation points
 | 
				
			||||||
 | 
					-----------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. Return to user space
 | 
				
			||||||
 | 
					^^^^^^^^^^^^^^^^^^^^^^^
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   When transitioning from kernel to user space the CPU buffers are flushed
 | 
				
			||||||
 | 
					   on affected CPUs when the mitigation is not disabled on the kernel
 | 
				
			||||||
 | 
					   command line. The migitation is enabled through the static key
 | 
				
			||||||
 | 
					   mds_user_clear.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   The mitigation is invoked in prepare_exit_to_usermode() which covers
 | 
				
			||||||
 | 
					   most of the kernel to user space transitions. There are a few exceptions
 | 
				
			||||||
 | 
					   which are not invoking prepare_exit_to_usermode() on return to user
 | 
				
			||||||
 | 
					   space. These exceptions use the paranoid exit code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   - Non Maskable Interrupt (NMI):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     Access to sensible data like keys, credentials in the NMI context is
 | 
				
			||||||
 | 
					     mostly theoretical: The CPU can do prefetching or execute a
 | 
				
			||||||
 | 
					     misspeculated code path and thereby fetching data which might end up
 | 
				
			||||||
 | 
					     leaking through a buffer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     But for mounting other attacks the kernel stack address of the task is
 | 
				
			||||||
 | 
					     already valuable information. So in full mitigation mode, the NMI is
 | 
				
			||||||
 | 
					     mitigated on the return from do_nmi() to provide almost complete
 | 
				
			||||||
 | 
					     coverage.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   - Double fault (#DF):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     A double fault is usually fatal, but the ESPFIX workaround, which can
 | 
				
			||||||
 | 
					     be triggered from user space through modify_ldt(2) is a recoverable
 | 
				
			||||||
 | 
					     double fault. #DF uses the paranoid exit path, so explicit mitigation
 | 
				
			||||||
 | 
					     in the double fault handler is required.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   - Machine Check Exception (#MC):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     Another corner case is a #MC which hits between the CPU buffer clear
 | 
				
			||||||
 | 
					     invocation and the actual return to user. As this still is in kernel
 | 
				
			||||||
 | 
					     space it takes the paranoid exit path which does not clear the CPU
 | 
				
			||||||
 | 
					     buffers. So the #MC handler repopulates the buffers to some
 | 
				
			||||||
 | 
					     extent. Machine checks are not reliably controllable and the window is
 | 
				
			||||||
 | 
					     extremly small so mitigation would just tick a checkbox that this
 | 
				
			||||||
 | 
					     theoretical corner case is covered. To keep the amount of special
 | 
				
			||||||
 | 
					     cases small, ignore #MC.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   - Debug Exception (#DB):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					     This takes the paranoid exit path only when the INT1 breakpoint is in
 | 
				
			||||||
 | 
					     kernel space. #DB on a user space address takes the regular exit path,
 | 
				
			||||||
 | 
					     so no extra mitigation required.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,6 +31,7 @@
 | 
				
			||||||
#include <asm/vdso.h>
 | 
					#include <asm/vdso.h>
 | 
				
			||||||
#include <linux/uaccess.h>
 | 
					#include <linux/uaccess.h>
 | 
				
			||||||
#include <asm/cpufeature.h>
 | 
					#include <asm/cpufeature.h>
 | 
				
			||||||
 | 
					#include <asm/nospec-branch.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CREATE_TRACE_POINTS
 | 
					#define CREATE_TRACE_POINTS
 | 
				
			||||||
#include <trace/events/syscalls.h>
 | 
					#include <trace/events/syscalls.h>
 | 
				
			||||||
| 
						 | 
					@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	user_enter_irqoff();
 | 
						user_enter_irqoff();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mds_user_clear_cpu_buffers();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SYSCALL_EXIT_WORK_FLAGS				\
 | 
					#define SYSCALL_EXIT_WORK_FLAGS				\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -318,6 +318,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
 | 
				
			||||||
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
 | 
					DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
 | 
				
			||||||
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 | 
					DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DECLARE_STATIC_KEY_FALSE(mds_user_clear);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <asm/segment.h>
 | 
					#include <asm/segment.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
| 
						 | 
					@ -343,6 +345,17 @@ static inline void mds_clear_cpu_buffers(void)
 | 
				
			||||||
	asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
 | 
						asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Clear CPU buffers if the corresponding static key is enabled
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					static inline void mds_user_clear_cpu_buffers(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (static_branch_likely(&mds_user_clear))
 | 
				
			||||||
 | 
							mds_clear_cpu_buffers();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* __ASSEMBLY__ */
 | 
					#endif /* __ASSEMBLY__ */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -63,6 +63,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
 | 
				
			||||||
/* Control unconditional IBPB in switch_mm() */
 | 
					/* Control unconditional IBPB in switch_mm() */
 | 
				
			||||||
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 | 
					DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Control MDS CPU buffer clear before returning to user space */
 | 
				
			||||||
 | 
					DEFINE_STATIC_KEY_FALSE(mds_user_clear);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void __init check_bugs(void)
 | 
					void __init check_bugs(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	identify_boot_cpu();
 | 
						identify_boot_cpu();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -34,6 +34,7 @@
 | 
				
			||||||
#include <asm/x86_init.h>
 | 
					#include <asm/x86_init.h>
 | 
				
			||||||
#include <asm/reboot.h>
 | 
					#include <asm/reboot.h>
 | 
				
			||||||
#include <asm/cache.h>
 | 
					#include <asm/cache.h>
 | 
				
			||||||
 | 
					#include <asm/nospec-branch.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CREATE_TRACE_POINTS
 | 
					#define CREATE_TRACE_POINTS
 | 
				
			||||||
#include <trace/events/nmi.h>
 | 
					#include <trace/events/nmi.h>
 | 
				
			||||||
| 
						 | 
					@ -533,6 +534,9 @@ do_nmi(struct pt_regs *regs, long error_code)
 | 
				
			||||||
		write_cr2(this_cpu_read(nmi_cr2));
 | 
							write_cr2(this_cpu_read(nmi_cr2));
 | 
				
			||||||
	if (this_cpu_dec_return(nmi_state))
 | 
						if (this_cpu_dec_return(nmi_state))
 | 
				
			||||||
		goto nmi_restart;
 | 
							goto nmi_restart;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (user_mode(regs))
 | 
				
			||||||
 | 
							mds_user_clear_cpu_buffers();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
NOKPROBE_SYMBOL(do_nmi);
 | 
					NOKPROBE_SYMBOL(do_nmi);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -58,6 +58,7 @@
 | 
				
			||||||
#include <asm/alternative.h>
 | 
					#include <asm/alternative.h>
 | 
				
			||||||
#include <asm/fpu/xstate.h>
 | 
					#include <asm/fpu/xstate.h>
 | 
				
			||||||
#include <asm/trace/mpx.h>
 | 
					#include <asm/trace/mpx.h>
 | 
				
			||||||
 | 
					#include <asm/nospec-branch.h>
 | 
				
			||||||
#include <asm/mpx.h>
 | 
					#include <asm/mpx.h>
 | 
				
			||||||
#include <asm/vm86.h>
 | 
					#include <asm/vm86.h>
 | 
				
			||||||
#include <asm/umip.h>
 | 
					#include <asm/umip.h>
 | 
				
			||||||
| 
						 | 
					@ -366,6 +367,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 | 
				
			||||||
		regs->ip = (unsigned long)general_protection;
 | 
							regs->ip = (unsigned long)general_protection;
 | 
				
			||||||
		regs->sp = (unsigned long)&gpregs->orig_ax;
 | 
							regs->sp = (unsigned long)&gpregs->orig_ax;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * This situation can be triggered by userspace via
 | 
				
			||||||
 | 
							 * modify_ldt(2) and the return does not take the regular
 | 
				
			||||||
 | 
							 * user space exit, so a CPU buffer clear is required when
 | 
				
			||||||
 | 
							 * MDS mitigation is enabled.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							mds_user_clear_cpu_buffers();
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue