mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	x86: Enable 5-level paging support via CONFIG_X86_5LEVEL=y
Most of things are in place and we can enable support for 5-level paging. The patch makes XEN_PV and XEN_PVH dependent on !X86_5LEVEL. Both are not ready to work with 5-level paging. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reviewed-by: Juergen Gross <jgross@suse.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170716225954.74185-9-kirill.shutemov@linux.intel.com [ Minor readability edits. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									ee00f4a32a
								
							
						
					
					
						commit
						77ef56e4f0
					
				
					 3 changed files with 88 additions and 0 deletions
				
			
		
							
								
								
									
										64
									
								
								Documentation/x86/x86_64/5level-paging.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								Documentation/x86/x86_64/5level-paging.txt
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,64 @@
 | 
				
			||||||
 | 
					== Overview ==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Original x86-64 was limited by 4-level paing to 256 TiB of virtual address
 | 
				
			||||||
 | 
					space and 64 TiB of physical address space. We are already bumping into
 | 
				
			||||||
 | 
					this limit: some vendors offers servers with 64 TiB of memory today.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To overcome the limitation upcoming hardware will introduce support for
 | 
				
			||||||
 | 
					5-level paging. It is a straight-forward extension of the current page
 | 
				
			||||||
 | 
					table structure adding one more layer of translation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It bumps the limits to 128 PiB of virtual address space and 4 PiB of
 | 
				
			||||||
 | 
					physical address space. This "ought to be enough for anybody" ©.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					QEMU 2.9 and later support 5-level paging.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Virtual memory layout for 5-level paging is described in
 | 
				
			||||||
 | 
					Documentation/x86/x86_64/mm.txt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					== Enabling 5-level paging ==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CONFIG_X86_5LEVEL=y enables the feature.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					So far, a kernel compiled with the option enabled will be able to boot
 | 
				
			||||||
 | 
					only on machines that supports the feature -- see for 'la57' flag in
 | 
				
			||||||
 | 
					/proc/cpuinfo.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The plan is to implement boot-time switching between 4- and 5-level paging
 | 
				
			||||||
 | 
					in the future.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					== User-space and large virtual address space ==
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					On x86, 5-level paging enables 56-bit userspace virtual address space.
 | 
				
			||||||
 | 
					Not all user space is ready to handle wide addresses. It's known that
 | 
				
			||||||
 | 
					at least some JIT compilers use higher bits in pointers to encode their
 | 
				
			||||||
 | 
					information. It collides with valid pointers with 5-level paging and
 | 
				
			||||||
 | 
					leads to crashes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To mitigate this, we are not going to allocate virtual address space
 | 
				
			||||||
 | 
					above 47-bit by default.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					But userspace can ask for allocation from full address space by
 | 
				
			||||||
 | 
					specifying hint address (with or without MAP_FIXED) above 47-bits.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If hint address set above 47-bit, but MAP_FIXED is not specified, we try
 | 
				
			||||||
 | 
					to look for unmapped area by specified address. If it's already
 | 
				
			||||||
 | 
					occupied, we look for unmapped area in *full* address space, rather than
 | 
				
			||||||
 | 
					from 47-bit window.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A high hint address would only affect the allocation in question, but not
 | 
				
			||||||
 | 
					any future mmap()s.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Specifying high hint address on older kernel or on machine without 5-level
 | 
				
			||||||
 | 
					paging support is safe. The hint will be ignored and kernel will fall back
 | 
				
			||||||
 | 
					to allocation from 47-bit address space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This approach helps to easily make application's memory allocator aware
 | 
				
			||||||
 | 
					about large address space without manually tracking allocated virtual
 | 
				
			||||||
 | 
					address space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					One important case we need to handle here is interaction with MPX.
 | 
				
			||||||
 | 
					MPX (without MAWA extension) cannot handle addresses above 47-bit, so we
 | 
				
			||||||
 | 
					need to make sure that MPX cannot be enabled we already have VMA above
 | 
				
			||||||
 | 
					the boundary and forbid creating such VMAs once MPX is enabled.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -326,6 +326,7 @@ config FIX_EARLYCON_MEM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config PGTABLE_LEVELS
 | 
					config PGTABLE_LEVELS
 | 
				
			||||||
	int
 | 
						int
 | 
				
			||||||
 | 
						default 5 if X86_5LEVEL
 | 
				
			||||||
	default 4 if X86_64
 | 
						default 4 if X86_64
 | 
				
			||||||
	default 3 if X86_PAE
 | 
						default 3 if X86_PAE
 | 
				
			||||||
	default 2
 | 
						default 2
 | 
				
			||||||
| 
						 | 
					@ -1398,6 +1399,24 @@ config X86_PAE
 | 
				
			||||||
	  has the cost of more pagetable lookup overhead, and also
 | 
						  has the cost of more pagetable lookup overhead, and also
 | 
				
			||||||
	  consumes more pagetable space per process.
 | 
						  consumes more pagetable space per process.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					config X86_5LEVEL
 | 
				
			||||||
 | 
						bool "Enable 5-level page tables support"
 | 
				
			||||||
 | 
						depends on X86_64
 | 
				
			||||||
 | 
						---help---
 | 
				
			||||||
 | 
						  5-level paging enables access to larger address space:
 | 
				
			||||||
 | 
						  upto 128 PiB of virtual address space and 4 PiB of
 | 
				
			||||||
 | 
						  physical address space.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  It will be supported by future Intel CPUs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  Note: a kernel with this option enabled can only be booted
 | 
				
			||||||
 | 
						  on machines that support the feature.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  See Documentation/x86/x86_64/5level-paging.txt for more
 | 
				
			||||||
 | 
						  information.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  Say N if unsure.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config ARCH_PHYS_ADDR_T_64BIT
 | 
					config ARCH_PHYS_ADDR_T_64BIT
 | 
				
			||||||
	def_bool y
 | 
						def_bool y
 | 
				
			||||||
	depends on X86_64 || X86_PAE
 | 
						depends on X86_64 || X86_PAE
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -17,6 +17,9 @@ config XEN_PV
 | 
				
			||||||
	bool "Xen PV guest support"
 | 
						bool "Xen PV guest support"
 | 
				
			||||||
	default y
 | 
						default y
 | 
				
			||||||
	depends on XEN
 | 
						depends on XEN
 | 
				
			||||||
 | 
						# XEN_PV is not ready to work with 5-level paging.
 | 
				
			||||||
 | 
						# Changes to hypervisor are also required.
 | 
				
			||||||
 | 
						depends on !X86_5LEVEL
 | 
				
			||||||
	select XEN_HAVE_PVMMU
 | 
						select XEN_HAVE_PVMMU
 | 
				
			||||||
	select XEN_HAVE_VPMU
 | 
						select XEN_HAVE_VPMU
 | 
				
			||||||
	help
 | 
						help
 | 
				
			||||||
| 
						 | 
					@ -75,4 +78,6 @@ config XEN_DEBUG_FS
 | 
				
			||||||
config XEN_PVH
 | 
					config XEN_PVH
 | 
				
			||||||
	bool "Support for running as a PVH guest"
 | 
						bool "Support for running as a PVH guest"
 | 
				
			||||||
	depends on XEN && XEN_PVHVM && ACPI
 | 
						depends on XEN && XEN_PVHVM && ACPI
 | 
				
			||||||
 | 
						# Pre-built page tables are not ready to handle 5-level paging.
 | 
				
			||||||
 | 
						depends on !X86_5LEVEL
 | 
				
			||||||
	def_bool n
 | 
						def_bool n
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue