mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	 f6e39794f4
			
		
	
	
		f6e39794f4
		
	
	
	
	
		
			
			The kernel can use to allocate executable memory. The only supported way to do that is via __vmalloc_node_range() with the executable bit set in the prot argument. (vmap() resets the bit via pgprot_nx()). Once tag-based KASAN modes start tagging vmalloc allocations, executing code from such allocations will lead to the PC register getting a tag, which is not tolerated by the kernel. Only tag the allocations for normal kernel pages. [andreyknvl@google.com: pass KASAN_VMALLOC_PROT_NORMAL to kasan_unpoison_vmalloc()] Link: https://lkml.kernel.org/r/9230ca3d3e40ffca041c133a524191fd71969a8d.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: support tagged vmalloc mappings] Link: https://lkml.kernel.org/r/2f6605e3a358cf64d73a05710cb3da356886ad29.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: don't unintentionally disabled poisoning] Link: https://lkml.kernel.org/r/de4587d6a719232e83c760113e46ed2d4d8da61e.1646757322.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/fbfd9939a4dc375923c9a5c6b9e7ab05c26b8c6b.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov <andreyknvl@google.com> Acked-by: Marco Elver <elver@google.com> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Evgenii Stepanov <eugenis@google.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Collingbourne <pcc@google.com> Cc: Vincenzo Frascino <vincenzo.frascino@arm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			158 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			158 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| /*
 | |
|  * Shadow Call Stack support.
 | |
|  *
 | |
|  * Copyright (C) 2019 Google LLC
 | |
|  */
 | |
| 
 | |
| #include <linux/cpuhotplug.h>
 | |
| #include <linux/kasan.h>
 | |
| #include <linux/mm.h>
 | |
| #include <linux/scs.h>
 | |
| #include <linux/vmalloc.h>
 | |
| #include <linux/vmstat.h>
 | |
| 
 | |
| static void __scs_account(void *s, int account)
 | |
| {
 | |
| 	struct page *scs_page = vmalloc_to_page(s);
 | |
| 
 | |
| 	mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
 | |
| 			    account * (SCS_SIZE / SZ_1K));
 | |
| }
 | |
| 
 | |
| /* Matches NR_CACHED_STACKS for VMAP_STACK */
 | |
| #define NR_CACHED_SCS 2
 | |
| static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
 | |
| 
 | |
| static void *__scs_alloc(int node)
 | |
| {
 | |
| 	int i;
 | |
| 	void *s;
 | |
| 
 | |
| 	for (i = 0; i < NR_CACHED_SCS; i++) {
 | |
| 		s = this_cpu_xchg(scs_cache[i], NULL);
 | |
| 		if (s) {
 | |
| 			s = kasan_unpoison_vmalloc(s, SCS_SIZE,
 | |
| 						   KASAN_VMALLOC_PROT_NORMAL);
 | |
| 			memset(s, 0, SCS_SIZE);
 | |
| 			goto out;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
 | |
| 				    GFP_SCS, PAGE_KERNEL, 0, node,
 | |
| 				    __builtin_return_address(0));
 | |
| 
 | |
| out:
 | |
| 	return kasan_reset_tag(s);
 | |
| }
 | |
| 
 | |
| void *scs_alloc(int node)
 | |
| {
 | |
| 	void *s;
 | |
| 
 | |
| 	s = __scs_alloc(node);
 | |
| 	if (!s)
 | |
| 		return NULL;
 | |
| 
 | |
| 	*__scs_magic(s) = SCS_END_MAGIC;
 | |
| 
 | |
| 	/*
 | |
| 	 * Poison the allocation to catch unintentional accesses to
 | |
| 	 * the shadow stack when KASAN is enabled.
 | |
| 	 */
 | |
| 	kasan_poison_vmalloc(s, SCS_SIZE);
 | |
| 	__scs_account(s, 1);
 | |
| 	return s;
 | |
| }
 | |
| 
 | |
| void scs_free(void *s)
 | |
| {
 | |
| 	int i;
 | |
| 
 | |
| 	__scs_account(s, -1);
 | |
| 
 | |
| 	/*
 | |
| 	 * We cannot sleep as this can be called in interrupt context,
 | |
| 	 * so use this_cpu_cmpxchg to update the cache, and vfree_atomic
 | |
| 	 * to free the stack.
 | |
| 	 */
 | |
| 
 | |
| 	for (i = 0; i < NR_CACHED_SCS; i++)
 | |
| 		if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
 | |
| 			return;
 | |
| 
 | |
| 	kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_PROT_NORMAL);
 | |
| 	vfree_atomic(s);
 | |
| }
 | |
| 
 | |
| static int scs_cleanup(unsigned int cpu)
 | |
| {
 | |
| 	int i;
 | |
| 	void **cache = per_cpu_ptr(scs_cache, cpu);
 | |
| 
 | |
| 	for (i = 0; i < NR_CACHED_SCS; i++) {
 | |
| 		vfree(cache[i]);
 | |
| 		cache[i] = NULL;
 | |
| 	}
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| void __init scs_init(void)
 | |
| {
 | |
| 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
 | |
| 			  scs_cleanup);
 | |
| }
 | |
| 
 | |
| int scs_prepare(struct task_struct *tsk, int node)
 | |
| {
 | |
| 	void *s = scs_alloc(node);
 | |
| 
 | |
| 	if (!s)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	task_scs(tsk) = task_scs_sp(tsk) = s;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static void scs_check_usage(struct task_struct *tsk)
 | |
| {
 | |
| 	static unsigned long highest;
 | |
| 
 | |
| 	unsigned long *p, prev, curr = highest, used = 0;
 | |
| 
 | |
| 	if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
 | |
| 		return;
 | |
| 
 | |
| 	for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
 | |
| 		if (!READ_ONCE_NOCHECK(*p))
 | |
| 			break;
 | |
| 		used += sizeof(*p);
 | |
| 	}
 | |
| 
 | |
| 	while (used > curr) {
 | |
| 		prev = cmpxchg_relaxed(&highest, curr, used);
 | |
| 
 | |
| 		if (prev == curr) {
 | |
| 			pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
 | |
| 				tsk->comm, task_pid_nr(tsk), used);
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		curr = prev;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| void scs_release(struct task_struct *tsk)
 | |
| {
 | |
| 	void *s = task_scs(tsk);
 | |
| 
 | |
| 	if (!s)
 | |
| 		return;
 | |
| 
 | |
| 	WARN(task_scs_end_corrupted(tsk),
 | |
| 	     "corrupted shadow stack detected when freeing task\n");
 | |
| 	scs_check_usage(tsk);
 | |
| 	scs_free(s);
 | |
| }
 |