forked from mirrors/linux
		
	 872bb37f68
			
		
	
	
		872bb37f68
		
	
	
	
	
		
			
			The codgen for adding architecture-specific stack alignment to the effective alloca() usage is somewhat inefficient and allows a bit to get carried beyond the desired entropy range. This isn't really a problem, but it's unexpected and the codegen is kind of bad. Quoting Mark[1], the disassembly for arm64's invoke_syscall() looks like: // offset = raw_cpu_read(kstack_offset) mov x4, sp adrp x0, kstack_offset mrs x5, tpidr_el1 add x0, x0, #:lo12:kstack_offset ldr w0, [x0, x5] // offset = KSTACK_OFFSET_MAX(offset) and x0, x0, #0x3ff // alloca(offset) add x0, x0, #0xf and x0, x0, #0x7f0 sub sp, x4, x0 ... which in C would be: offset = raw_cpu_read(kstack_offset) offset &= 0x3ff; // [0x0, 0x3ff] offset += 0xf; // [0xf, 0x40e] offset &= 0x7f0; // [0x0, ... so when *all* bits [3:0] are 0, they'll have no impact, and when *any* of bits [3:0] are 1 they'll trigger a carry into bit 4, which could ripple all the way up and spill into bit 10. Switch the masking in KSTACK_OFFSET_MAX() to explicitly clear the bottom bits to avoid the rounding by using 0b1111110000 instead of 0b1111111111: // offset = raw_cpu_read(kstack_offset) mov x4, sp adrp x0, 0 <kstack_offset> mrs x5, tpidr_el1 add x0, x0, #:lo12:kstack_offset ldr w0, [x0, x5] // offset = KSTACK_OFFSET_MAX(offset) and x0, x0, #0x3f0 // alloca(offset) sub sp, x4, x0 Suggested-by: Mark Rutland <mark.rutland@arm.com> Link: https://lore.kernel.org/lkml/ZnVfOnIuFl2kNWkT@J2N7QTR9R3/ [1] Link: https://lore.kernel.org/r/20240702211612.work.576-kees@kernel.org Signed-off-by: Kees Cook <kees@kernel.org>
		
			
				
	
	
		
			98 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			98 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0-only */
 | |
| #ifndef _LINUX_RANDOMIZE_KSTACK_H
 | |
| #define _LINUX_RANDOMIZE_KSTACK_H
 | |
| 
 | |
| #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/jump_label.h>
 | |
| #include <linux/percpu-defs.h>
 | |
| 
 | |
| DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 | |
| 			 randomize_kstack_offset);
 | |
| DECLARE_PER_CPU(u32, kstack_offset);
 | |
| 
 | |
| /*
 | |
|  * Do not use this anywhere else in the kernel. This is used here because
 | |
|  * it provides an arch-agnostic way to grow the stack with correct
 | |
|  * alignment. Also, since this use is being explicitly masked to a max of
 | |
|  * 10 bits, stack-clash style attacks are unlikely. For more details see
 | |
|  * "VLAs" in Documentation/process/deprecated.rst
 | |
|  *
 | |
|  * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently
 | |
|  * only with Clang and not GCC). Initializing the unused area on each syscall
 | |
|  * entry is expensive, and generating an implicit call to memset() may also be
 | |
|  * problematic (such as in noinstr functions). Therefore, if the compiler
 | |
|  * supports it (which it should if it initializes allocas), always use the
 | |
|  * "uninitialized" variant of the builtin.
 | |
|  */
 | |
| #if __has_builtin(__builtin_alloca_uninitialized)
 | |
| #define __kstack_alloca __builtin_alloca_uninitialized
 | |
| #else
 | |
| #define __kstack_alloca __builtin_alloca
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Use, at most, 6 bits of entropy (on 64-bit; 8 on 32-bit). This cap is
 | |
|  * to keep the "VLA" from being unbounded (see above). Additionally clear
 | |
|  * the bottom 4 bits (on 64-bit systems, 2 for 32-bit), since stack
 | |
|  * alignment will always be at least word size. This makes the compiler
 | |
|  * code gen better when it is applying the actual per-arch alignment to
 | |
|  * the final offset. The resulting randomness is reasonable without overly
 | |
|  * constraining usable stack space.
 | |
|  */
 | |
| #ifdef CONFIG_64BIT
 | |
| #define KSTACK_OFFSET_MAX(x)	((x) & 0b1111110000)
 | |
| #else
 | |
| #define KSTACK_OFFSET_MAX(x)	((x) & 0b1111111100)
 | |
| #endif
 | |
| 
 | |
| /**
 | |
|  * add_random_kstack_offset - Increase stack utilization by previously
 | |
|  *			      chosen random offset
 | |
|  *
 | |
|  * This should be used in the syscall entry path when interrupts and
 | |
|  * preempt are disabled, and after user registers have been stored to
 | |
|  * the stack. For testing the resulting entropy, please see:
 | |
|  * tools/testing/selftests/lkdtm/stack-entropy.sh
 | |
|  */
 | |
| #define add_random_kstack_offset() do {					\
 | |
| 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 | |
| 				&randomize_kstack_offset)) {		\
 | |
| 		u32 offset = raw_cpu_read(kstack_offset);		\
 | |
| 		u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));	\
 | |
| 		/* Keep allocation even after "ptr" loses scope. */	\
 | |
| 		asm volatile("" :: "r"(ptr) : "memory");		\
 | |
| 	}								\
 | |
| } while (0)
 | |
| 
 | |
| /**
 | |
|  * choose_random_kstack_offset - Choose the random offset for the next
 | |
|  *				 add_random_kstack_offset()
 | |
|  *
 | |
|  * This should only be used during syscall exit when interrupts and
 | |
|  * preempt are disabled. This position in the syscall flow is done to
 | |
|  * frustrate attacks from userspace attempting to learn the next offset:
 | |
|  * - Maximize the timing uncertainty visible from userspace: if the
 | |
|  *   offset is chosen at syscall entry, userspace has much more control
 | |
|  *   over the timing between choosing offsets. "How long will we be in
 | |
|  *   kernel mode?" tends to be more difficult to predict than "how long
 | |
|  *   will we be in user mode?"
 | |
|  * - Reduce the lifetime of the new offset sitting in memory during
 | |
|  *   kernel mode execution. Exposure of "thread-local" memory content
 | |
|  *   (e.g. current, percpu, etc) tends to be easier than arbitrary
 | |
|  *   location memory exposure.
 | |
|  */
 | |
| #define choose_random_kstack_offset(rand) do {				\
 | |
| 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 | |
| 				&randomize_kstack_offset)) {		\
 | |
| 		u32 offset = raw_cpu_read(kstack_offset);		\
 | |
| 		offset = ror32(offset, 5) ^ (rand);			\
 | |
| 		raw_cpu_write(kstack_offset, offset);			\
 | |
| 	}								\
 | |
| } while (0)
 | |
| #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 | |
| #define add_random_kstack_offset()		do { } while (0)
 | |
| #define choose_random_kstack_offset(rand)	do { } while (0)
 | |
| #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 | |
| 
 | |
| #endif
 |