forked from mirrors/linux
		
	powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX
Today, STRICT_KERNEL_RWX is based on the use of regular pages to map kernel pages. On Book3s 32, it has three consequences: - Using pages instead of BAT for mapping kernel linear memory severely impacts performance. - Exec protection is not effective because no-execute cannot be set at page level (except on 603 which doesn't have hash tables) - Write protection is not effective because PP bits do not provide RO mode for kernel-only pages (except on 603 which handles it in software via PAGE_DIRTY) On the 603+, we have: - Independent IBAT and DBAT allowing limitation of exec parts. - NX bit can be set in segment registers to forbit execution on memory mapped by pages. - RO mode on DBATs even for kernel-only blocks. On the 601, there is nothing much we can do other than warn the user about it, because: - BATs are common to instructions and data. - BAT do not provide RO mode for kernel-only blocks. - segment registers don't have the NX bit. In order to use IBAT for exec protection, this patch: - Aligns _etext to BAT block sizes (128kb) - Set NX bit in kernel segment register (Except on vmalloc area when CONFIG_MODULES is selected) - Maps kernel text with IBATs. In order to use DBAT for exec protection, this patch: - Aligns RW DATA to BAT block sizes (4M) - Maps kernel RO area with write prohibited DBATs - Maps remaining memory with remaining DBATs Here is what we get with this patch on a 832x when activating STRICT_KERNEL_RWX: Symbols: c0000000 T _stext c0680000 R __start_rodata c0680000 R _etext c0800000 T __init_begin c0800000 T _sinittext ~# cat /sys/kernel/debug/block_address_translation ---[ Instruction Block Address Translation ]--- 0: 0xc0000000-0xc03fffff 0x00000000 Kernel EXEC coherent 1: 0xc0400000-0xc05fffff 0x00400000 Kernel EXEC coherent 2: 0xc0600000-0xc067ffff 0x00600000 Kernel EXEC coherent 3: - 4: - 5: - 6: - 7: - ---[ Data Block Address Translation ]--- 0: 0xc0000000-0xc07fffff 0x00000000 Kernel RO coherent 1: 0xc0800000-0xc0ffffff 0x00800000 Kernel RW coherent 2: 0xc1000000-0xc1ffffff 0x01000000 Kernel RW coherent 3: 0xc2000000-0xc3ffffff 0x02000000 Kernel RW coherent 4: 0xc4000000-0xc7ffffff 0x04000000 Kernel RW coherent 5: 0xc8000000-0xcfffffff 0x08000000 Kernel RW coherent 6: 0xd0000000-0xdfffffff 0x10000000 Kernel RW coherent 7: - ~# cat /sys/kernel/debug/segment_registers ---[ User Segments ]--- 0x00000000-0x0fffffff Kern key 1 User key 1 VSID 0xa085d0 0x10000000-0x1fffffff Kern key 1 User key 1 VSID 0xa086e1 0x20000000-0x2fffffff Kern key 1 User key 1 VSID 0xa087f2 0x30000000-0x3fffffff Kern key 1 User key 1 VSID 0xa08903 0x40000000-0x4fffffff Kern key 1 User key 1 VSID 0xa08a14 0x50000000-0x5fffffff Kern key 1 User key 1 VSID 0xa08b25 0x60000000-0x6fffffff Kern key 1 User key 1 VSID 0xa08c36 0x70000000-0x7fffffff Kern key 1 User key 1 VSID 0xa08d47 0x80000000-0x8fffffff Kern key 1 User key 1 VSID 0xa08e58 0x90000000-0x9fffffff Kern key 1 User key 1 VSID 0xa08f69 0xa0000000-0xafffffff Kern key 1 User key 1 VSID 0xa0907a 0xb0000000-0xbfffffff Kern key 1 User key 1 VSID 0xa0918b ---[ Kernel Segments ]--- 0xc0000000-0xcfffffff Kern key 0 User key 1 No Exec VSID 0x000ccc 0xd0000000-0xdfffffff Kern key 0 User key 1 No Exec VSID 0x000ddd 0xe0000000-0xefffffff Kern key 0 User key 1 No Exec VSID 0x000eee 0xf0000000-0xffffffff Kern key 0 User key 1 No Exec VSID 0x000fff Aligning _etext to 128kb allows to map up to 32Mb text with 8 IBATs: 16Mb + 8Mb + 4Mb + 2Mb + 1Mb + 512kb + 256kb + 128kb (+ 128kb) = 32Mb (A 9th IBAT is unneeded as 32Mb would need only a single 32Mb block) Aligning data to 4M allows to map up to 512Mb data with 8 DBATs: 16Mb + 8Mb + 4Mb + 4Mb + 32Mb + 64Mb + 128Mb + 256Mb = 512Mb Because some processors only have 4 BATs and because some targets need DBATs for mapping other areas, the following patch will allow to modify _etext and data alignment. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
		
							parent
							
								
									5e04ae85fb
								
							
						
					
					
						commit
						63b2bc6195
					
				
					 6 changed files with 112 additions and 10 deletions
				
			
		|  | @ -723,11 +723,13 @@ config THREAD_SHIFT | |||
| 
 | ||||
| config ETEXT_SHIFT | ||||
| 	int | ||||
| 	default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 | ||||
| 	default PPC_PAGE_SHIFT | ||||
| 
 | ||||
| config DATA_SHIFT | ||||
| 	int | ||||
| 	default 24 if STRICT_KERNEL_RWX && PPC64 | ||||
| 	default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 | ||||
| 	default PPC_PAGE_SHIFT | ||||
| 
 | ||||
| config FORCE_MAX_ZONEORDER | ||||
|  |  | |||
|  | @ -174,7 +174,18 @@ static inline bool pte_user(pte_t pte) | |||
|  * of RAM.  -- Cort | ||||
|  */ | ||||
| #define VMALLOC_OFFSET (0x1000000) /* 16M */ | ||||
| 
 | ||||
| /*
 | ||||
|  * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules | ||||
|  * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear | ||||
|  * memory shall not share segments. | ||||
|  */ | ||||
| #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES) | ||||
| #define VMALLOC_START ((_ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \ | ||||
| 		       ~(VMALLOC_OFFSET - 1)) | ||||
| #else | ||||
| #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) | ||||
| #endif | ||||
| #define VMALLOC_END	ioremap_bot | ||||
| 
 | ||||
| #ifndef __ASSEMBLY__ | ||||
|  |  | |||
|  | @ -108,10 +108,8 @@ static void __init MMU_setup(void) | |||
| 		__map_without_bats = 1; | ||||
| 		__map_without_ltlbs = 1; | ||||
| 	} | ||||
| 	if (strict_kernel_rwx_enabled()) { | ||||
| 		__map_without_bats = 1; | ||||
| 	if (strict_kernel_rwx_enabled()) | ||||
| 		__map_without_ltlbs = 1; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  |  | |||
|  | @ -165,3 +165,11 @@ unsigned long p_block_mapped(phys_addr_t pa); | |||
| static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; } | ||||
| static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } | ||||
| #endif | ||||
| 
 | ||||
| #if defined(CONFIG_PPC_BOOK3S_32) | ||||
| void mmu_mark_initmem_nx(void); | ||||
| void mmu_mark_rodata_ro(void); | ||||
| #else | ||||
| static inline void mmu_mark_initmem_nx(void) { } | ||||
| static inline void mmu_mark_rodata_ro(void) { } | ||||
| #endif | ||||
|  |  | |||
|  | @ -353,6 +353,9 @@ void mark_initmem_nx(void) | |||
| 	unsigned long numpages = PFN_UP((unsigned long)_einittext) - | ||||
| 				 PFN_DOWN((unsigned long)_sinittext); | ||||
| 
 | ||||
| 	if (v_block_mapped((unsigned long)_stext) + 1) | ||||
| 		mmu_mark_initmem_nx(); | ||||
| 	else | ||||
| 		change_page_attr(page, numpages, PAGE_KERNEL); | ||||
| } | ||||
| 
 | ||||
|  | @ -362,6 +365,11 @@ void mark_rodata_ro(void) | |||
| 	struct page *page; | ||||
| 	unsigned long numpages; | ||||
| 
 | ||||
| 	if (v_block_mapped((unsigned long)_sinittext)) { | ||||
| 		mmu_mark_rodata_ro(); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	page = virt_to_page(_stext); | ||||
| 	numpages = PFN_UP((unsigned long)_etext) - | ||||
| 		   PFN_DOWN((unsigned long)_stext); | ||||
|  |  | |||
|  | @ -32,6 +32,7 @@ | |||
| #include <asm/mmu.h> | ||||
| #include <asm/machdep.h> | ||||
| #include <asm/code-patching.h> | ||||
| #include <asm/sections.h> | ||||
| 
 | ||||
| #include "mmu_decl.h" | ||||
| 
 | ||||
|  | @ -138,15 +139,10 @@ static void clearibat(int index) | |||
| 	bat[0].batl = 0; | ||||
| } | ||||
| 
 | ||||
| unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) | ||||
| static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top) | ||||
| { | ||||
| 	int idx; | ||||
| 
 | ||||
| 	if (__map_without_bats) { | ||||
| 		printk(KERN_DEBUG "RAM mapped without BATs\n"); | ||||
| 		return base; | ||||
| 	} | ||||
| 
 | ||||
| 	while ((idx = find_free_bat()) != -1 && base != top) { | ||||
| 		unsigned int size = block_size(base, top); | ||||
| 
 | ||||
|  | @ -159,6 +155,85 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) | |||
| 	return base; | ||||
| } | ||||
| 
 | ||||
| unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) | ||||
| { | ||||
| 	int done; | ||||
| 	unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; | ||||
| 
 | ||||
| 	if (__map_without_bats) { | ||||
| 		pr_debug("RAM mapped without BATs\n"); | ||||
| 		return base; | ||||
| 	} | ||||
| 
 | ||||
| 	if (!strict_kernel_rwx_enabled() || base >= border || top <= border) | ||||
| 		return __mmu_mapin_ram(base, top); | ||||
| 
 | ||||
| 	done = __mmu_mapin_ram(base, border); | ||||
| 	if (done != border - base) | ||||
| 		return done; | ||||
| 
 | ||||
| 	return done + __mmu_mapin_ram(border, top); | ||||
| } | ||||
| 
 | ||||
| void mmu_mark_initmem_nx(void) | ||||
| { | ||||
| 	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; | ||||
| 	int i; | ||||
| 	unsigned long base = (unsigned long)_stext - PAGE_OFFSET; | ||||
| 	unsigned long top = (unsigned long)_etext - PAGE_OFFSET; | ||||
| 	unsigned long size; | ||||
| 
 | ||||
| 	if (cpu_has_feature(CPU_FTR_601)) | ||||
| 		return; | ||||
| 
 | ||||
| 	for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { | ||||
| 		size = block_size(base, top); | ||||
| 		setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); | ||||
| 		base += size; | ||||
| 	} | ||||
| 	if (base < top) { | ||||
| 		size = block_size(base, top); | ||||
| 		size = max(size, 128UL << 10); | ||||
| 		if ((top - base) > size) { | ||||
| 			if (strict_kernel_rwx_enabled()) | ||||
| 				pr_warn("Kernel _etext not properly aligned\n"); | ||||
| 			size <<= 1; | ||||
| 		} | ||||
| 		setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); | ||||
| 		base += size; | ||||
| 	} | ||||
| 	for (; i < nb; i++) | ||||
| 		clearibat(i); | ||||
| 
 | ||||
| 	update_bats(); | ||||
| 
 | ||||
| 	for (i = TASK_SIZE >> 28; i < 16; i++) { | ||||
| 		/* Do not set NX on VM space for modules */ | ||||
| 		if (IS_ENABLED(CONFIG_MODULES) && | ||||
| 		    (VMALLOC_START & 0xf0000000) == i << 28) | ||||
| 			break; | ||||
| 		mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void mmu_mark_rodata_ro(void) | ||||
| { | ||||
| 	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (cpu_has_feature(CPU_FTR_601)) | ||||
| 		return; | ||||
| 
 | ||||
| 	for (i = 0; i < nb; i++) { | ||||
| 		struct ppc_bat *bat = BATS[i]; | ||||
| 
 | ||||
| 		if (bat_addrs[i].start < (unsigned long)__init_begin) | ||||
| 			bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX; | ||||
| 	} | ||||
| 
 | ||||
| 	update_bats(); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Set up one of the I/D BAT (block address translation) register pairs. | ||||
|  * The parameters are not checked; in particular size must be a power | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 Christophe Leroy
						Christophe Leroy