mirror of
				https://github.com/torvalds/linux.git
				synced 2025-10-31 16:48:26 +02:00 
			
		
		
		
	mm: Hardened usercopy
This is the start of porting PAX_USERCOPY into the mainline kernel. This
is the first set of features, controlled by CONFIG_HARDENED_USERCOPY. The
work is based on code by PaX Team and Brad Spengler, and an earlier port
from Casey Schaufler. Additional non-slab page tests are from Rik van Riel.
This patch contains the logic for validating several conditions when
performing copy_to_user() and copy_from_user() on the kernel object
being copied to/from:
- address range doesn't wrap around
- address range isn't NULL or zero-allocated (with a non-zero copy size)
- if on the slab allocator:
  - object size must be less than or equal to copy size (when check is
    implemented in the allocator, which appear in subsequent patches)
- otherwise, object must not span page allocations (excepting Reserved
  and CMA ranges)
- if on the stack
  - object must not extend before/after the current process stack
  - object must be contained by a valid stack frame (when there is
    arch/build support for identifying stack frames)
- object must not overlap with kernel text
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
			
			
This commit is contained in:
		
							parent
							
								
									0f60a8efe4
								
							
						
					
					
						commit
						f5509cc18d
					
				
					 5 changed files with 327 additions and 0 deletions
				
			
		|  | @ -155,6 +155,18 @@ void kfree(const void *); | ||||||
| void kzfree(const void *); | void kzfree(const void *); | ||||||
| size_t ksize(const void *); | size_t ksize(const void *); | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR | ||||||
|  | const char *__check_heap_object(const void *ptr, unsigned long n, | ||||||
|  | 				struct page *page); | ||||||
|  | #else | ||||||
|  | static inline const char *__check_heap_object(const void *ptr, | ||||||
|  | 					      unsigned long n, | ||||||
|  | 					      struct page *page) | ||||||
|  | { | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Some archs want to perform DMA into kmalloc caches and need a guaranteed |  * Some archs want to perform DMA into kmalloc caches and need a guaranteed | ||||||
|  * alignment larger than the alignment of a 64-bit integer. |  * alignment larger than the alignment of a 64-bit integer. | ||||||
|  |  | ||||||
|  | @ -155,6 +155,21 @@ static inline int arch_within_stack_frames(const void * const stack, | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #ifdef CONFIG_HARDENED_USERCOPY | ||||||
|  | extern void __check_object_size(const void *ptr, unsigned long n, | ||||||
|  | 					bool to_user); | ||||||
|  | 
 | ||||||
|  | static inline void check_object_size(const void *ptr, unsigned long n, | ||||||
|  | 				     bool to_user) | ||||||
|  | { | ||||||
|  | 	__check_object_size(ptr, n, to_user); | ||||||
|  | } | ||||||
|  | #else | ||||||
|  | static inline void check_object_size(const void *ptr, unsigned long n, | ||||||
|  | 				     bool to_user) | ||||||
|  | { } | ||||||
|  | #endif /* CONFIG_HARDENED_USERCOPY */ | ||||||
|  | 
 | ||||||
| #endif	/* __KERNEL__ */ | #endif	/* __KERNEL__ */ | ||||||
| 
 | 
 | ||||||
| #endif /* _LINUX_THREAD_INFO_H */ | #endif /* _LINUX_THREAD_INFO_H */ | ||||||
|  |  | ||||||
|  | @ -21,6 +21,9 @@ KCOV_INSTRUMENT_memcontrol.o := n | ||||||
| KCOV_INSTRUMENT_mmzone.o := n | KCOV_INSTRUMENT_mmzone.o := n | ||||||
| KCOV_INSTRUMENT_vmstat.o := n | KCOV_INSTRUMENT_vmstat.o := n | ||||||
| 
 | 
 | ||||||
|  | # Since __builtin_frame_address does work as used, disable the warning.
 | ||||||
|  | CFLAGS_usercopy.o += $(call cc-disable-warning, frame-address) | ||||||
|  | 
 | ||||||
| mmu-y			:= nommu.o | mmu-y			:= nommu.o | ||||||
| mmu-$(CONFIG_MMU)	:= gup.o highmem.o memory.o mincore.o \
 | mmu-$(CONFIG_MMU)	:= gup.o highmem.o memory.o mincore.o \
 | ||||||
| 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
 | 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
 | ||||||
|  | @ -99,3 +102,4 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o | ||||||
| obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o | obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o | ||||||
| obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o | obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o | ||||||
| obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o | obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o | ||||||
|  | obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o | ||||||
|  |  | ||||||
							
								
								
									
										268
									
								
								mm/usercopy.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										268
									
								
								mm/usercopy.c
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,268 @@ | ||||||
|  | /*
 | ||||||
|  |  * This implements the various checks for CONFIG_HARDENED_USERCOPY*, | ||||||
|  |  * which are designed to protect kernel memory from needless exposure | ||||||
|  |  * and overwrite under many unintended conditions. This code is based | ||||||
|  |  * on PAX_USERCOPY, which is: | ||||||
|  |  * | ||||||
|  |  * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source | ||||||
|  |  * Security Inc. | ||||||
|  |  * | ||||||
|  |  * This program is free software; you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License version 2 as | ||||||
|  |  * published by the Free Software Foundation. | ||||||
|  |  * | ||||||
|  |  */ | ||||||
|  | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||||||
|  | 
 | ||||||
|  | #include <linux/mm.h> | ||||||
|  | #include <linux/slab.h> | ||||||
|  | #include <asm/sections.h> | ||||||
|  | 
 | ||||||
|  | enum { | ||||||
|  | 	BAD_STACK = -1, | ||||||
|  | 	NOT_STACK = 0, | ||||||
|  | 	GOOD_FRAME, | ||||||
|  | 	GOOD_STACK, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Checks if a given pointer and length is contained by the current | ||||||
|  |  * stack frame (if possible). | ||||||
|  |  * | ||||||
|  |  * Returns: | ||||||
|  |  *	NOT_STACK: not at all on the stack | ||||||
|  |  *	GOOD_FRAME: fully within a valid stack frame | ||||||
|  |  *	GOOD_STACK: fully on the stack (when can't do frame-checking) | ||||||
|  |  *	BAD_STACK: error condition (invalid stack position or bad stack frame) | ||||||
|  |  */ | ||||||
|  | static noinline int check_stack_object(const void *obj, unsigned long len) | ||||||
|  | { | ||||||
|  | 	const void * const stack = task_stack_page(current); | ||||||
|  | 	const void * const stackend = stack + THREAD_SIZE; | ||||||
|  | 	int ret; | ||||||
|  | 
 | ||||||
|  | 	/* Object is not on the stack at all. */ | ||||||
|  | 	if (obj + len <= stack || stackend <= obj) | ||||||
|  | 		return NOT_STACK; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Reject: object partially overlaps the stack (passing the | ||||||
|  | 	 * the check above means at least one end is within the stack, | ||||||
|  | 	 * so if this check fails, the other end is outside the stack). | ||||||
|  | 	 */ | ||||||
|  | 	if (obj < stack || stackend < obj + len) | ||||||
|  | 		return BAD_STACK; | ||||||
|  | 
 | ||||||
|  | 	/* Check if object is safely within a valid frame. */ | ||||||
|  | 	ret = arch_within_stack_frames(stack, stackend, obj, len); | ||||||
|  | 	if (ret) | ||||||
|  | 		return ret; | ||||||
|  | 
 | ||||||
|  | 	return GOOD_STACK; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void report_usercopy(const void *ptr, unsigned long len, | ||||||
|  | 			    bool to_user, const char *type) | ||||||
|  | { | ||||||
|  | 	pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n", | ||||||
|  | 		to_user ? "exposure" : "overwrite", | ||||||
|  | 		to_user ? "from" : "to", ptr, type ? : "unknown", len); | ||||||
|  | 	/*
 | ||||||
|  | 	 * For greater effect, it would be nice to do do_group_exit(), | ||||||
|  | 	 * but BUG() actually hooks all the lock-breaking and per-arch | ||||||
|  | 	 * Oops code, so that is used here instead. | ||||||
|  | 	 */ | ||||||
|  | 	BUG(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ | ||||||
|  | static bool overlaps(const void *ptr, unsigned long n, unsigned long low, | ||||||
|  | 		     unsigned long high) | ||||||
|  | { | ||||||
|  | 	unsigned long check_low = (uintptr_t)ptr; | ||||||
|  | 	unsigned long check_high = check_low + n; | ||||||
|  | 
 | ||||||
|  | 	/* Does not overlap if entirely above or entirely below. */ | ||||||
|  | 	if (check_low >= high || check_high < low) | ||||||
|  | 		return false; | ||||||
|  | 
 | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Is this address range in the kernel text area? */ | ||||||
|  | static inline const char *check_kernel_text_object(const void *ptr, | ||||||
|  | 						   unsigned long n) | ||||||
|  | { | ||||||
|  | 	unsigned long textlow = (unsigned long)_stext; | ||||||
|  | 	unsigned long texthigh = (unsigned long)_etext; | ||||||
|  | 	unsigned long textlow_linear, texthigh_linear; | ||||||
|  | 
 | ||||||
|  | 	if (overlaps(ptr, n, textlow, texthigh)) | ||||||
|  | 		return "<kernel text>"; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Some architectures have virtual memory mappings with a secondary | ||||||
|  | 	 * mapping of the kernel text, i.e. there is more than one virtual | ||||||
|  | 	 * kernel address that points to the kernel image. It is usually | ||||||
|  | 	 * when there is a separate linear physical memory mapping, in that | ||||||
|  | 	 * __pa() is not just the reverse of __va(). This can be detected | ||||||
|  | 	 * and checked: | ||||||
|  | 	 */ | ||||||
|  | 	textlow_linear = (unsigned long)__va(__pa(textlow)); | ||||||
|  | 	/* No different mapping: we're done. */ | ||||||
|  | 	if (textlow_linear == textlow) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	/* Check the secondary mapping... */ | ||||||
|  | 	texthigh_linear = (unsigned long)__va(__pa(texthigh)); | ||||||
|  | 	if (overlaps(ptr, n, textlow_linear, texthigh_linear)) | ||||||
|  | 		return "<linear kernel text>"; | ||||||
|  | 
 | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline const char *check_bogus_address(const void *ptr, unsigned long n) | ||||||
|  | { | ||||||
|  | 	/* Reject if object wraps past end of memory. */ | ||||||
|  | 	if (ptr + n < ptr) | ||||||
|  | 		return "<wrapped address>"; | ||||||
|  | 
 | ||||||
|  | 	/* Reject if NULL or ZERO-allocation. */ | ||||||
|  | 	if (ZERO_OR_NULL_PTR(ptr)) | ||||||
|  | 		return "<null>"; | ||||||
|  | 
 | ||||||
|  | 	return NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline const char *check_heap_object(const void *ptr, unsigned long n, | ||||||
|  | 					    bool to_user) | ||||||
|  | { | ||||||
|  | 	struct page *page, *endpage; | ||||||
|  | 	const void *end = ptr + n - 1; | ||||||
|  | 	bool is_reserved, is_cma; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Some architectures (arm64) return true for virt_addr_valid() on | ||||||
|  | 	 * vmalloced addresses. Work around this by checking for vmalloc | ||||||
|  | 	 * first. | ||||||
|  | 	 */ | ||||||
|  | 	if (is_vmalloc_addr(ptr)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	if (!virt_addr_valid(ptr)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	page = virt_to_head_page(ptr); | ||||||
|  | 
 | ||||||
|  | 	/* Check slab allocator for flags and size. */ | ||||||
|  | 	if (PageSlab(page)) | ||||||
|  | 		return __check_heap_object(ptr, n, page); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Sometimes the kernel data regions are not marked Reserved (see | ||||||
|  | 	 * check below). And sometimes [_sdata,_edata) does not cover | ||||||
|  | 	 * rodata and/or bss, so check each range explicitly. | ||||||
|  | 	 */ | ||||||
|  | 
 | ||||||
|  | 	/* Allow reads of kernel rodata region (if not marked as Reserved). */ | ||||||
|  | 	if (ptr >= (const void *)__start_rodata && | ||||||
|  | 	    end <= (const void *)__end_rodata) { | ||||||
|  | 		if (!to_user) | ||||||
|  | 			return "<rodata>"; | ||||||
|  | 		return NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Allow kernel data region (if not marked as Reserved). */ | ||||||
|  | 	if (ptr >= (const void *)_sdata && end <= (const void *)_edata) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	/* Allow kernel bss region (if not marked as Reserved). */ | ||||||
|  | 	if (ptr >= (const void *)__bss_start && | ||||||
|  | 	    end <= (const void *)__bss_stop) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	/* Is the object wholly within one base page? */ | ||||||
|  | 	if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == | ||||||
|  | 		   ((unsigned long)end & (unsigned long)PAGE_MASK))) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	/* Allow if start and end are inside the same compound page. */ | ||||||
|  | 	endpage = virt_to_head_page(end); | ||||||
|  | 	if (likely(endpage == page)) | ||||||
|  | 		return NULL; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Reject if range is entirely either Reserved (i.e. special or | ||||||
|  | 	 * device memory), or CMA. Otherwise, reject since the object spans | ||||||
|  | 	 * several independently allocated pages. | ||||||
|  | 	 */ | ||||||
|  | 	is_reserved = PageReserved(page); | ||||||
|  | 	is_cma = is_migrate_cma_page(page); | ||||||
|  | 	if (!is_reserved && !is_cma) | ||||||
|  | 		goto reject; | ||||||
|  | 
 | ||||||
|  | 	for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { | ||||||
|  | 		page = virt_to_head_page(ptr); | ||||||
|  | 		if (is_reserved && !PageReserved(page)) | ||||||
|  | 			goto reject; | ||||||
|  | 		if (is_cma && !is_migrate_cma_page(page)) | ||||||
|  | 			goto reject; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return NULL; | ||||||
|  | 
 | ||||||
|  | reject: | ||||||
|  | 	return "<spans multiple pages>"; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Validates that the given object is: | ||||||
|  |  * - not bogus address | ||||||
|  |  * - known-safe heap or stack object | ||||||
|  |  * - not in kernel text | ||||||
|  |  */ | ||||||
|  | void __check_object_size(const void *ptr, unsigned long n, bool to_user) | ||||||
|  | { | ||||||
|  | 	const char *err; | ||||||
|  | 
 | ||||||
|  | 	/* Skip all tests if size is zero. */ | ||||||
|  | 	if (!n) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	/* Check for invalid addresses. */ | ||||||
|  | 	err = check_bogus_address(ptr, n); | ||||||
|  | 	if (err) | ||||||
|  | 		goto report; | ||||||
|  | 
 | ||||||
|  | 	/* Check for bad heap object. */ | ||||||
|  | 	err = check_heap_object(ptr, n, to_user); | ||||||
|  | 	if (err) | ||||||
|  | 		goto report; | ||||||
|  | 
 | ||||||
|  | 	/* Check for bad stack object. */ | ||||||
|  | 	switch (check_stack_object(ptr, n)) { | ||||||
|  | 	case NOT_STACK: | ||||||
|  | 		/* Object is not touching the current process stack. */ | ||||||
|  | 		break; | ||||||
|  | 	case GOOD_FRAME: | ||||||
|  | 	case GOOD_STACK: | ||||||
|  | 		/*
 | ||||||
|  | 		 * Object is either in the correct frame (when it | ||||||
|  | 		 * is possible to check) or just generally on the | ||||||
|  | 		 * process stack (when frame checking not available). | ||||||
|  | 		 */ | ||||||
|  | 		return; | ||||||
|  | 	default: | ||||||
|  | 		err = "<process stack>"; | ||||||
|  | 		goto report; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Check for object in kernel to avoid text exposure. */ | ||||||
|  | 	err = check_kernel_text_object(ptr, n); | ||||||
|  | 	if (!err) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | report: | ||||||
|  | 	report_usercopy(ptr, n, to_user, err); | ||||||
|  | } | ||||||
|  | EXPORT_SYMBOL(__check_object_size); | ||||||
|  | @ -118,6 +118,34 @@ config LSM_MMAP_MIN_ADDR | ||||||
| 	  this low address space will need the permission specific to the | 	  this low address space will need the permission specific to the | ||||||
| 	  systems running LSM. | 	  systems running LSM. | ||||||
| 
 | 
 | ||||||
|  | config HAVE_HARDENED_USERCOPY_ALLOCATOR | ||||||
|  | 	bool | ||||||
|  | 	help | ||||||
|  | 	  The heap allocator implements __check_heap_object() for | ||||||
|  | 	  validating memory ranges against heap object sizes in | ||||||
|  | 	  support of CONFIG_HARDENED_USERCOPY. | ||||||
|  | 
 | ||||||
|  | config HAVE_ARCH_HARDENED_USERCOPY | ||||||
|  | 	bool | ||||||
|  | 	help | ||||||
|  | 	  The architecture supports CONFIG_HARDENED_USERCOPY by | ||||||
|  | 	  calling check_object_size() just before performing the | ||||||
|  | 	  userspace copies in the low level implementation of | ||||||
|  | 	  copy_to_user() and copy_from_user(). | ||||||
|  | 
 | ||||||
|  | config HARDENED_USERCOPY | ||||||
|  | 	bool "Harden memory copies between kernel and userspace" | ||||||
|  | 	depends on HAVE_ARCH_HARDENED_USERCOPY | ||||||
|  | 	select BUG | ||||||
|  | 	help | ||||||
|  | 	  This option checks for obviously wrong memory regions when | ||||||
|  | 	  copying memory to/from the kernel (via copy_to_user() and | ||||||
|  | 	  copy_from_user() functions) by rejecting memory ranges that | ||||||
|  | 	  are larger than the specified heap object, span multiple | ||||||
|  | 	  separately allocates pages, are not on the process stack, | ||||||
|  | 	  or are part of the kernel text. This kills entire classes | ||||||
|  | 	  of heap overflow exploits and similar kernel memory exposures. | ||||||
|  | 
 | ||||||
| source security/selinux/Kconfig | source security/selinux/Kconfig | ||||||
| source security/smack/Kconfig | source security/smack/Kconfig | ||||||
| source security/tomoyo/Kconfig | source security/tomoyo/Kconfig | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue
	
	 Kees Cook
						Kees Cook