mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	x86/xen: remove 32-bit Xen PV guest support
Xen is requiring 64-bit machines today and since Xen 4.14 it can be built without 32-bit PV guest support. There is no need to carry the burden of 32-bit PV guest support in the kernel any longer, as new guests can be either HVM or PVH, or they can use a 64 bit kernel. Remove the 32-bit Xen PV support from the kernel. Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> Signed-off-by: Juergen Gross <jgross@suse.com>
This commit is contained in:
		
							parent
							
								
									d7b461caa6
								
							
						
					
					
						commit
						a13f2ef168
					
				
					 18 changed files with 50 additions and 840 deletions
				
			
		| 
						 | 
					@ -449,8 +449,6 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.macro SWITCH_TO_KERNEL_STACK
 | 
					.macro SWITCH_TO_KERNEL_STACK
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	BUG_IF_WRONG_CR3
 | 
						BUG_IF_WRONG_CR3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
 | 
						SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
 | 
				
			||||||
| 
						 | 
					@ -599,8 +597,6 @@
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
.macro SWITCH_TO_ENTRY_STACK
 | 
					.macro SWITCH_TO_ENTRY_STACK
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Bytes to copy */
 | 
						/* Bytes to copy */
 | 
				
			||||||
	movl	$PTREGS_SIZE, %ecx
 | 
						movl	$PTREGS_SIZE, %ecx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
 | 
				
			||||||
 * will ignore all of the single-step traps generated in this range.
 | 
					 * will ignore all of the single-step traps generated in this range.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_XEN_PV
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Xen doesn't set %esp to be precisely what the normal SYSENTER
 | 
					 | 
				
			||||||
 * entry point expects, so fix it up before using the normal path.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
SYM_CODE_START(xen_sysenter_target)
 | 
					 | 
				
			||||||
	addl	$5*4, %esp			/* remove xen-provided frame */
 | 
					 | 
				
			||||||
	jmp	.Lsysenter_past_esp
 | 
					 | 
				
			||||||
SYM_CODE_END(xen_sysenter_target)
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * 32-bit SYSENTER entry.
 | 
					 * 32-bit SYSENTER entry.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	movl	%esp, %eax
 | 
						movl	%esp, %eax
 | 
				
			||||||
	call	do_SYSENTER_32
 | 
						call	do_SYSENTER_32
 | 
				
			||||||
	/* XEN PV guests always use IRET path */
 | 
						testl	%eax, %eax
 | 
				
			||||||
	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
 | 
						jz	.Lsyscall_32_done
 | 
				
			||||||
		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	STACKLEAK_ERASE
 | 
						STACKLEAK_ERASE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
.endm
 | 
					.endm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_PARAVIRT
 | 
					 | 
				
			||||||
SYM_CODE_START(native_iret)
 | 
					 | 
				
			||||||
	iret
 | 
					 | 
				
			||||||
	_ASM_EXTABLE(native_iret, asm_iret_error)
 | 
					 | 
				
			||||||
SYM_CODE_END(native_iret)
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_XEN_PV
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * See comment in entry_64.S for further explanation
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Note: This is not an actual IDT entry point. It's a XEN specific entry
 | 
					 | 
				
			||||||
 * point and therefore named to match the 64-bit trampoline counterpart.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback)
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Check to see if we got the event in the critical
 | 
					 | 
				
			||||||
	 * region in xen_iret_direct, after we've reenabled
 | 
					 | 
				
			||||||
	 * events and checked for pending events.  This simulates
 | 
					 | 
				
			||||||
	 * iret instruction's behaviour where it delivers a
 | 
					 | 
				
			||||||
	 * pending interrupt when enabling interrupts:
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	cmpl	$xen_iret_start_crit, (%esp)
 | 
					 | 
				
			||||||
	jb	1f
 | 
					 | 
				
			||||||
	cmpl	$xen_iret_end_crit, (%esp)
 | 
					 | 
				
			||||||
	jae	1f
 | 
					 | 
				
			||||||
	call	xen_iret_crit_fixup
 | 
					 | 
				
			||||||
1:
 | 
					 | 
				
			||||||
	pushl	$-1				/* orig_ax = -1 => not a system call */
 | 
					 | 
				
			||||||
	SAVE_ALL
 | 
					 | 
				
			||||||
	ENCODE_FRAME_POINTER
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	mov	%esp, %eax
 | 
					 | 
				
			||||||
	call	xen_pv_evtchn_do_upcall
 | 
					 | 
				
			||||||
	jmp	handle_exception_return
 | 
					 | 
				
			||||||
SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Hypervisor uses this for application faults while it executes.
 | 
					 | 
				
			||||||
 * We get here for two reasons:
 | 
					 | 
				
			||||||
 *  1. Fault while reloading DS, ES, FS or GS
 | 
					 | 
				
			||||||
 *  2. Fault while executing IRET
 | 
					 | 
				
			||||||
 * Category 1 we fix up by reattempting the load, and zeroing the segment
 | 
					 | 
				
			||||||
 * register if the load fails.
 | 
					 | 
				
			||||||
 * Category 2 we fix up by jumping to do_iret_error. We cannot use the
 | 
					 | 
				
			||||||
 * normal Linux return path in this case because if we use the IRET hypercall
 | 
					 | 
				
			||||||
 * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
 | 
					 | 
				
			||||||
 * We distinguish between categories by maintaining a status value in EAX.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
SYM_FUNC_START(xen_failsafe_callback)
 | 
					 | 
				
			||||||
	pushl	%eax
 | 
					 | 
				
			||||||
	movl	$1, %eax
 | 
					 | 
				
			||||||
1:	mov	4(%esp), %ds
 | 
					 | 
				
			||||||
2:	mov	8(%esp), %es
 | 
					 | 
				
			||||||
3:	mov	12(%esp), %fs
 | 
					 | 
				
			||||||
4:	mov	16(%esp), %gs
 | 
					 | 
				
			||||||
	/* EAX == 0 => Category 1 (Bad segment)
 | 
					 | 
				
			||||||
	   EAX != 0 => Category 2 (Bad IRET) */
 | 
					 | 
				
			||||||
	testl	%eax, %eax
 | 
					 | 
				
			||||||
	popl	%eax
 | 
					 | 
				
			||||||
	lea	16(%esp), %esp
 | 
					 | 
				
			||||||
	jz	5f
 | 
					 | 
				
			||||||
	jmp	asm_iret_error
 | 
					 | 
				
			||||||
5:	pushl	$-1				/* orig_ax = -1 => not a system call */
 | 
					 | 
				
			||||||
	SAVE_ALL
 | 
					 | 
				
			||||||
	ENCODE_FRAME_POINTER
 | 
					 | 
				
			||||||
	jmp	handle_exception_return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.section .fixup, "ax"
 | 
					 | 
				
			||||||
6:	xorl	%eax, %eax
 | 
					 | 
				
			||||||
	movl	%eax, 4(%esp)
 | 
					 | 
				
			||||||
	jmp	1b
 | 
					 | 
				
			||||||
7:	xorl	%eax, %eax
 | 
					 | 
				
			||||||
	movl	%eax, 8(%esp)
 | 
					 | 
				
			||||||
	jmp	2b
 | 
					 | 
				
			||||||
8:	xorl	%eax, %eax
 | 
					 | 
				
			||||||
	movl	%eax, 12(%esp)
 | 
					 | 
				
			||||||
	jmp	3b
 | 
					 | 
				
			||||||
9:	xorl	%eax, %eax
 | 
					 | 
				
			||||||
	movl	%eax, 16(%esp)
 | 
					 | 
				
			||||||
	jmp	4b
 | 
					 | 
				
			||||||
.previous
 | 
					 | 
				
			||||||
	_ASM_EXTABLE(1b, 6b)
 | 
					 | 
				
			||||||
	_ASM_EXTABLE(2b, 7b)
 | 
					 | 
				
			||||||
	_ASM_EXTABLE(3b, 8b)
 | 
					 | 
				
			||||||
	_ASM_EXTABLE(4b, 9b)
 | 
					 | 
				
			||||||
SYM_FUNC_END(xen_failsafe_callback)
 | 
					 | 
				
			||||||
#endif /* CONFIG_XEN_PV */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
 | 
					SYM_CODE_START_LOCAL_NOALIGN(handle_exception)
 | 
				
			||||||
	/* the function address is in %gs's slot on the stack */
 | 
						/* the function address is in %gs's slot on the stack */
 | 
				
			||||||
	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
 | 
						SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a")
 | 
				
			||||||
ELFNOTE_END
 | 
					ELFNOTE_END
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BUILD_SALT
 | 
					BUILD_SALT
 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_XEN
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Add a special note telling glibc's dynamic linker a fake hardware
 | 
					 | 
				
			||||||
 * flavor that it will use to choose the search path for libraries in the
 | 
					 | 
				
			||||||
 * same way it uses real hardware capabilities like "mmx".
 | 
					 | 
				
			||||||
 * We supply "nosegneg" as the fake capability, to indicate that we
 | 
					 | 
				
			||||||
 * do not like negative offsets in instructions using segment overrides,
 | 
					 | 
				
			||||||
 * since we implement those inefficiently.  This makes it possible to
 | 
					 | 
				
			||||||
 * install libraries optimized to avoid those access patterns in someplace
 | 
					 | 
				
			||||||
 * like /lib/i686/tls/nosegneg.  Note that an /etc/ld.so.conf.d/file
 | 
					 | 
				
			||||||
 * corresponding to the bits here is needed to make ldconfig work right.
 | 
					 | 
				
			||||||
 * It should contain:
 | 
					 | 
				
			||||||
 *	hwcap 1 nosegneg
 | 
					 | 
				
			||||||
 * to match the mapping of bit to name that we give here.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * At runtime, the fake hardware feature will be considered to be present
 | 
					 | 
				
			||||||
 * if its bit is set in the mask word.  So, we start with the mask 0, and
 | 
					 | 
				
			||||||
 * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "../../xen/vdso.h"	/* Defines VDSO_NOTE_NONEGSEG_BIT.  */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ELFNOTE_START(GNU, 2, "a")
 | 
					 | 
				
			||||||
	.long 1			/* ncaps */
 | 
					 | 
				
			||||||
VDSO32_NOTE_MASK:		/* Symbol used by arch/x86/xen/setup.c */
 | 
					 | 
				
			||||||
	.long 0			/* mask */
 | 
					 | 
				
			||||||
	.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg"	/* bit, name */
 | 
					 | 
				
			||||||
ELFNOTE_END
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void);
 | 
				
			||||||
void __end_entry_SYSENTER_compat(void);
 | 
					void __end_entry_SYSENTER_compat(void);
 | 
				
			||||||
void entry_SYSCALL_compat(void);
 | 
					void entry_SYSCALL_compat(void);
 | 
				
			||||||
void entry_INT80_compat(void);
 | 
					void entry_INT80_compat(void);
 | 
				
			||||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
 | 
					#ifdef CONFIG_XEN_PV
 | 
				
			||||||
void xen_entry_INT80_compat(void);
 | 
					void xen_entry_INT80_compat(void);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
 | 
				
			||||||
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 | 
					extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
 | 
				
			||||||
extern void early_ignore_irq(void);
 | 
					extern void early_ignore_irq(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
 | 
					#ifdef CONFIG_XEN_PV
 | 
				
			||||||
extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
 | 
					extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,38 +134,7 @@ SYM_CODE_START(startup_32)
 | 
				
			||||||
	movl %eax,pa(initial_page_table+0xffc)
 | 
						movl %eax,pa(initial_page_table+0xffc)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_PARAVIRT
 | 
					 | 
				
			||||||
	/* This is can only trip for a broken bootloader... */
 | 
					 | 
				
			||||||
	cmpw $0x207, pa(boot_params + BP_version)
 | 
					 | 
				
			||||||
	jb .Ldefault_entry
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Paravirt-compatible boot parameters.  Look to see what architecture
 | 
					 | 
				
			||||||
		we're booting under. */
 | 
					 | 
				
			||||||
	movl pa(boot_params + BP_hardware_subarch), %eax
 | 
					 | 
				
			||||||
	cmpl $num_subarch_entries, %eax
 | 
					 | 
				
			||||||
	jae .Lbad_subarch
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	movl pa(subarch_entries)(,%eax,4), %eax
 | 
					 | 
				
			||||||
	subl $__PAGE_OFFSET, %eax
 | 
					 | 
				
			||||||
	jmp *%eax
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.Lbad_subarch:
 | 
					 | 
				
			||||||
SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK)
 | 
					 | 
				
			||||||
	/* Unknown implementation; there's really
 | 
					 | 
				
			||||||
	   nothing we can do at this point. */
 | 
					 | 
				
			||||||
	ud2a
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	__INITDATA
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
subarch_entries:
 | 
					 | 
				
			||||||
	.long .Ldefault_entry		/* normal x86/PC */
 | 
					 | 
				
			||||||
	.long xen_entry			/* Xen hypervisor */
 | 
					 | 
				
			||||||
	.long .Ldefault_entry		/* Moorestown MID */
 | 
					 | 
				
			||||||
num_subarch_entries = (. - subarch_entries) / 4
 | 
					 | 
				
			||||||
.previous
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	jmp .Ldefault_entry
 | 
						jmp .Ldefault_entry
 | 
				
			||||||
#endif /* CONFIG_PARAVIRT */
 | 
					 | 
				
			||||||
SYM_CODE_END(startup_32)
 | 
					SYM_CODE_END(startup_32)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_HOTPLUG_CPU
 | 
					#ifdef CONFIG_HOTPLUG_CPU
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -19,6 +19,7 @@ config XEN_PV
 | 
				
			||||||
	bool "Xen PV guest support"
 | 
						bool "Xen PV guest support"
 | 
				
			||||||
	default y
 | 
						default y
 | 
				
			||||||
	depends on XEN
 | 
						depends on XEN
 | 
				
			||||||
 | 
						depends on X86_64
 | 
				
			||||||
	select PARAVIRT_XXL
 | 
						select PARAVIRT_XXL
 | 
				
			||||||
	select XEN_HAVE_PVMMU
 | 
						select XEN_HAVE_PVMMU
 | 
				
			||||||
	select XEN_HAVE_VPMU
 | 
						select XEN_HAVE_VPMU
 | 
				
			||||||
| 
						 | 
					@ -50,7 +51,7 @@ config XEN_PVHVM_SMP
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config XEN_512GB
 | 
					config XEN_512GB
 | 
				
			||||||
	bool "Limit Xen pv-domain memory to 512GB"
 | 
						bool "Limit Xen pv-domain memory to 512GB"
 | 
				
			||||||
	depends on XEN_PV && X86_64
 | 
						depends on XEN_PV
 | 
				
			||||||
	default y
 | 
						default y
 | 
				
			||||||
	help
 | 
						help
 | 
				
			||||||
	  Limit paravirtualized user domains to 512GB of RAM.
 | 
						  Limit paravirtualized user domains to 512GB of RAM.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -58,10 +58,6 @@ static u32 xen_apic_read(u32 reg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (reg == APIC_LVR)
 | 
						if (reg == APIC_LVR)
 | 
				
			||||||
		return 0x14;
 | 
							return 0x14;
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	if (reg == APIC_LDR)
 | 
					 | 
				
			||||||
		return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	if (reg != APIC_ID)
 | 
						if (reg != APIC_ID)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -127,14 +123,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb)
 | 
				
			||||||
	return initial_apic_id >> index_msb;
 | 
						return initial_apic_id >> index_msb;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
static int xen_x86_32_early_logical_apicid(int cpu)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	/* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */
 | 
					 | 
				
			||||||
	return 1 << cpu;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void xen_noop(void)
 | 
					static void xen_noop(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -197,11 +185,6 @@ static struct apic xen_pv_apic = {
 | 
				
			||||||
	.icr_write 			= xen_apic_icr_write,
 | 
						.icr_write 			= xen_apic_icr_write,
 | 
				
			||||||
	.wait_icr_idle 			= xen_noop,
 | 
						.wait_icr_idle 			= xen_noop,
 | 
				
			||||||
	.safe_wait_icr_idle 		= xen_safe_apic_wait_icr_idle,
 | 
						.safe_wait_icr_idle 		= xen_safe_apic_wait_icr_idle,
 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	/* generic_processor_info and setup_local_APIC. */
 | 
					 | 
				
			||||||
	.x86_32_early_logical_apicid	= xen_x86_32_early_logical_apicid,
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __init xen_apic_check(void)
 | 
					static void __init xen_apic_check(void)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -119,14 +119,6 @@ static void __init xen_banner(void)
 | 
				
			||||||
	printk(KERN_INFO "Xen version: %d.%d%s%s\n",
 | 
						printk(KERN_INFO "Xen version: %d.%d%s%s\n",
 | 
				
			||||||
	       version >> 16, version & 0xffff, extra.extraversion,
 | 
						       version >> 16, version & 0xffff, extra.extraversion,
 | 
				
			||||||
	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 | 
						       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
 | 
					 | 
				
			||||||
		"Support for running as 32-bit PV-guest under Xen will soon be removed\n"
 | 
					 | 
				
			||||||
		"from the Linux kernel!\n"
 | 
					 | 
				
			||||||
		"Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
 | 
					 | 
				
			||||||
		"WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __init xen_pv_init_platform(void)
 | 
					static void __init xen_pv_init_platform(void)
 | 
				
			||||||
| 
						 | 
					@ -538,30 +530,12 @@ static void load_TLS_descriptor(struct thread_struct *t,
 | 
				
			||||||
static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 | 
					static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * XXX sleazy hack: If we're being called in a lazy-cpu zone
 | 
						 * In lazy mode we need to zero %fs, otherwise we may get an
 | 
				
			||||||
	 * and lazy gs handling is enabled, it means we're in a
 | 
					 | 
				
			||||||
	 * context switch, and %gs has just been saved.  This means we
 | 
					 | 
				
			||||||
	 * can zero it out to prevent faults on exit from the
 | 
					 | 
				
			||||||
	 * hypervisor if the next process has no %gs.  Either way, it
 | 
					 | 
				
			||||||
	 * has been saved, and the new value will get loaded properly.
 | 
					 | 
				
			||||||
	 * This will go away as soon as Xen has been modified to not
 | 
					 | 
				
			||||||
	 * save/restore %gs for normal hypercalls.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * On x86_64, this hack is not used for %gs, because gs points
 | 
					 | 
				
			||||||
	 * to KERNEL_GS_BASE (and uses it for PDA references), so we
 | 
					 | 
				
			||||||
	 * must not zero %gs on x86_64
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * For x86_64, we need to zero %fs, otherwise we may get an
 | 
					 | 
				
			||||||
	 * exception between the new %fs descriptor being loaded and
 | 
						 * exception between the new %fs descriptor being loaded and
 | 
				
			||||||
	 * %fs being effectively cleared at __switch_to().
 | 
						 * %fs being effectively cleared at __switch_to().
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
 | 
						if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
		lazy_load_gs(0);
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
		loadsegment(fs, 0);
 | 
							loadsegment(fs, 0);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_mc_batch();
 | 
						xen_mc_batch();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -572,13 +546,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 | 
				
			||||||
	xen_mc_issue(PARAVIRT_LAZY_CPU);
 | 
						xen_mc_issue(PARAVIRT_LAZY_CPU);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
static void xen_load_gs_index(unsigned int idx)
 | 
					static void xen_load_gs_index(unsigned int idx)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
 | 
						if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
 | 
				
			||||||
		BUG();
 | 
							BUG();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 | 
					static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 | 
				
			||||||
				const void *ptr)
 | 
									const void *ptr)
 | 
				
			||||||
| 
						 | 
					@ -597,7 +569,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 | 
				
			||||||
	preempt_enable();
 | 
						preempt_enable();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
void noist_exc_debug(struct pt_regs *regs);
 | 
					void noist_exc_debug(struct pt_regs *regs);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
 | 
					DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
 | 
				
			||||||
| 
						 | 
					@ -697,7 +668,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int cvt_gate_to_trap(int vector, const gate_desc *val,
 | 
					static int cvt_gate_to_trap(int vector, const gate_desc *val,
 | 
				
			||||||
			    struct trap_info *info)
 | 
								    struct trap_info *info)
 | 
				
			||||||
| 
						 | 
					@ -710,10 +680,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 | 
				
			||||||
	info->vector = vector;
 | 
						info->vector = vector;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	addr = gate_offset(val);
 | 
						addr = gate_offset(val);
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	if (!get_trap_addr((void **)&addr, val->bits.ist))
 | 
						if (!get_trap_addr((void **)&addr, val->bits.ist))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
#endif	/* CONFIG_X86_64 */
 | 
					 | 
				
			||||||
	info->address = addr;
 | 
						info->address = addr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	info->cs = gate_segment(val);
 | 
						info->cs = gate_segment(val);
 | 
				
			||||||
| 
						 | 
					@ -958,15 +926,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 | 
				
			||||||
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 | 
					static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	unsigned int which;
 | 
						unsigned int which;
 | 
				
			||||||
	u64 base;
 | 
						u64 base;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = 0;
 | 
						ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (msr) {
 | 
						switch (msr) {
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	case MSR_FS_BASE:		which = SEGBASE_FS; goto set;
 | 
						case MSR_FS_BASE:		which = SEGBASE_FS; goto set;
 | 
				
			||||||
	case MSR_KERNEL_GS_BASE:	which = SEGBASE_GS_USER; goto set;
 | 
						case MSR_KERNEL_GS_BASE:	which = SEGBASE_GS_USER; goto set;
 | 
				
			||||||
	case MSR_GS_BASE:		which = SEGBASE_GS_KERNEL; goto set;
 | 
						case MSR_GS_BASE:		which = SEGBASE_GS_KERNEL; goto set;
 | 
				
			||||||
| 
						 | 
					@ -976,7 +941,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 | 
				
			||||||
		if (HYPERVISOR_set_segment_base(which, base) != 0)
 | 
							if (HYPERVISOR_set_segment_base(which, base) != 0)
 | 
				
			||||||
			ret = -EIO;
 | 
								ret = -EIO;
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	case MSR_STAR:
 | 
						case MSR_STAR:
 | 
				
			||||||
	case MSR_CSTAR:
 | 
						case MSR_CSTAR:
 | 
				
			||||||
| 
						 | 
					@ -1058,9 +1022,7 @@ void __init xen_setup_vcpu_info_placement(void)
 | 
				
			||||||
static const struct pv_info xen_info __initconst = {
 | 
					static const struct pv_info xen_info __initconst = {
 | 
				
			||||||
	.shared_kernel_pmd = 0,
 | 
						.shared_kernel_pmd = 0,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	.extra_user_64bit_cs = FLAT_USER_CS64,
 | 
						.extra_user_64bit_cs = FLAT_USER_CS64,
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	.name = "Xen",
 | 
						.name = "Xen",
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1086,18 +1048,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 | 
				
			||||||
	.read_pmc = xen_read_pmc,
 | 
						.read_pmc = xen_read_pmc,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	.iret = xen_iret,
 | 
						.iret = xen_iret,
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	.usergs_sysret64 = xen_sysret64,
 | 
						.usergs_sysret64 = xen_sysret64,
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	.load_tr_desc = paravirt_nop,
 | 
						.load_tr_desc = paravirt_nop,
 | 
				
			||||||
	.set_ldt = xen_set_ldt,
 | 
						.set_ldt = xen_set_ldt,
 | 
				
			||||||
	.load_gdt = xen_load_gdt,
 | 
						.load_gdt = xen_load_gdt,
 | 
				
			||||||
	.load_idt = xen_load_idt,
 | 
						.load_idt = xen_load_idt,
 | 
				
			||||||
	.load_tls = xen_load_tls,
 | 
						.load_tls = xen_load_tls,
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	.load_gs_index = xen_load_gs_index,
 | 
						.load_gs_index = xen_load_gs_index,
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	.alloc_ldt = xen_alloc_ldt,
 | 
						.alloc_ldt = xen_alloc_ldt,
 | 
				
			||||||
	.free_ldt = xen_free_ldt,
 | 
						.free_ldt = xen_free_ldt,
 | 
				
			||||||
| 
						 | 
					@ -1364,15 +1322,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* keep using Xen gdt for now; no urgent need to change it */
 | 
						/* keep using Xen gdt for now; no urgent need to change it */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	pv_info.kernel_rpl = 1;
 | 
					 | 
				
			||||||
	if (xen_feature(XENFEAT_supervisor_mode_kernel))
 | 
					 | 
				
			||||||
		pv_info.kernel_rpl = 0;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	pv_info.kernel_rpl = 0;
 | 
						pv_info.kernel_rpl = 0;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	/* set the limit of our address space */
 | 
					 | 
				
			||||||
	xen_reserve_top();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * We used to do this in xen_arch_setup, but that is too late
 | 
						 * We used to do this in xen_arch_setup, but that is too late
 | 
				
			||||||
| 
						 | 
					@ -1384,12 +1334,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 | 
				
			||||||
	if (rc != 0)
 | 
						if (rc != 0)
 | 
				
			||||||
		xen_raw_printk("physdev_op failed %d\n", rc);
 | 
							xen_raw_printk("physdev_op failed %d\n", rc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	/* set up basic CPUID stuff */
 | 
					 | 
				
			||||||
	cpu_detect(&new_cpu_data);
 | 
					 | 
				
			||||||
	set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
 | 
					 | 
				
			||||||
	new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (xen_start_info->mod_start) {
 | 
						if (xen_start_info->mod_start) {
 | 
				
			||||||
	    if (xen_start_info->flags & SIF_MOD_START_PFN)
 | 
						    if (xen_start_info->flags & SIF_MOD_START_PFN)
 | 
				
			||||||
| 
						 | 
					@ -1458,12 +1402,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
 | 
				
			||||||
	xen_efi_init(&boot_params);
 | 
						xen_efi_init(&boot_params);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Start the world */
 | 
						/* Start the world */
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	i386_start_kernel();
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
 | 
						cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
 | 
				
			||||||
	x86_64_start_reservations((char *)__pa_symbol(&boot_params));
 | 
						x86_64_start_reservations((char *)__pa_symbol(&boot_params));
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int xen_cpu_up_prepare_pv(unsigned int cpu)
 | 
					static int xen_cpu_up_prepare_pv(unsigned int cpu)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -86,19 +86,8 @@
 | 
				
			||||||
#include "mmu.h"
 | 
					#include "mmu.h"
 | 
				
			||||||
#include "debugfs.h"
 | 
					#include "debugfs.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Identity map, in addition to plain kernel map.  This needs to be
 | 
					 | 
				
			||||||
 * large enough to allocate page table pages to allocate the rest.
 | 
					 | 
				
			||||||
 * Each page can map 2MB.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
#define LEVEL1_IDENT_ENTRIES	(PTRS_PER_PTE * 4)
 | 
					 | 
				
			||||||
static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
/* l3 pud for userspace vsyscall mapping */
 | 
					/* l3 pud for userspace vsyscall mapping */
 | 
				
			||||||
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
 | 
					static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
 | 
				
			||||||
#endif /* CONFIG_X86_64 */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Protects atomic reservation decrease/increase against concurrent increases.
 | 
					 * Protects atomic reservation decrease/increase against concurrent increases.
 | 
				
			||||||
| 
						 | 
					@ -280,10 +269,7 @@ static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
 | 
				
			||||||
	if (!xen_batched_set_pte(ptep, pteval)) {
 | 
						if (!xen_batched_set_pte(ptep, pteval)) {
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Could call native_set_pte() here and trap and
 | 
							 * Could call native_set_pte() here and trap and
 | 
				
			||||||
		 * emulate the PTE write but with 32-bit guests this
 | 
							 * emulate the PTE write, but a hypercall is much cheaper.
 | 
				
			||||||
		 * needs two traps (one for each of the two 32-bit
 | 
					 | 
				
			||||||
		 * words in the PTE) so do one hypercall directly
 | 
					 | 
				
			||||||
		 * instead.
 | 
					 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		struct mmu_update u;
 | 
							struct mmu_update u;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -439,26 +425,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val)
 | 
				
			||||||
	xen_set_pud_hyper(ptr, val);
 | 
						xen_set_pud_hyper(ptr, val);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_PAE
 | 
					 | 
				
			||||||
static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	trace_xen_mmu_set_pte_atomic(ptep, pte);
 | 
					 | 
				
			||||||
	__xen_set_pte(ptep, pte);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	trace_xen_mmu_pte_clear(mm, addr, ptep);
 | 
					 | 
				
			||||||
	__xen_set_pte(ptep, native_make_pte(0));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void xen_pmd_clear(pmd_t *pmdp)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	trace_xen_mmu_pmd_clear(pmdp);
 | 
					 | 
				
			||||||
	set_pmd(pmdp, __pmd(0));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif	/* CONFIG_X86_PAE */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__visible pmd_t xen_make_pmd(pmdval_t pmd)
 | 
					__visible pmd_t xen_make_pmd(pmdval_t pmd)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	pmd = pte_pfn_to_mfn(pmd);
 | 
						pmd = pte_pfn_to_mfn(pmd);
 | 
				
			||||||
| 
						 | 
					@ -466,7 +432,6 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
 | 
					PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
__visible pudval_t xen_pud_val(pud_t pud)
 | 
					__visible pudval_t xen_pud_val(pud_t pud)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return pte_mfn_to_pfn(pud.pud);
 | 
						return pte_mfn_to_pfn(pud.pud);
 | 
				
			||||||
| 
						 | 
					@ -571,7 +536,6 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
 | 
					PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
 | 
				
			||||||
#endif  /* CONFIG_PGTABLE_LEVELS >= 5 */
 | 
					#endif  /* CONFIG_PGTABLE_LEVELS >= 5 */
 | 
				
			||||||
#endif	/* CONFIG_X86_64 */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
 | 
					static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
 | 
				
			||||||
		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
 | 
							int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
 | 
				
			||||||
| 
						 | 
					@ -636,11 +600,8 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
 | 
				
			||||||
 * will be STACK_TOP_MAX, but at boot we need to pin up to
 | 
					 * will be STACK_TOP_MAX, but at boot we need to pin up to
 | 
				
			||||||
 * FIXADDR_TOP.
 | 
					 * FIXADDR_TOP.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * For 32-bit the important bit is that we don't pin beyond there,
 | 
					 * We must skip the Xen hole in the middle of the address space, just after
 | 
				
			||||||
 * because then we start getting into Xen's ptes.
 | 
					 * the big x86-64 virtual hole.
 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * For 64-bit, we must skip the Xen hole in the middle of the address
 | 
					 | 
				
			||||||
 * space, just after the big x86-64 virtual hole.
 | 
					 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 | 
					static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 | 
				
			||||||
			  int (*func)(struct mm_struct *mm, struct page *,
 | 
								  int (*func)(struct mm_struct *mm, struct page *,
 | 
				
			||||||
| 
						 | 
					@ -654,14 +615,12 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 | 
				
			||||||
	limit--;
 | 
						limit--;
 | 
				
			||||||
	BUG_ON(limit >= FIXADDR_TOP);
 | 
						BUG_ON(limit >= FIXADDR_TOP);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * 64-bit has a great big hole in the middle of the address
 | 
						 * 64-bit has a great big hole in the middle of the address
 | 
				
			||||||
	 * space, which contains the Xen mappings.
 | 
						 * space, which contains the Xen mappings.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
 | 
						hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
 | 
				
			||||||
	hole_high = pgd_index(GUARD_HOLE_END_ADDR);
 | 
						hole_high = pgd_index(GUARD_HOLE_END_ADDR);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	nr = pgd_index(limit) + 1;
 | 
						nr = pgd_index(limit) + 1;
 | 
				
			||||||
	for (i = 0; i < nr; i++) {
 | 
						for (i = 0; i < nr; i++) {
 | 
				
			||||||
| 
						 | 
					@ -787,6 +746,8 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
 | 
				
			||||||
   read-only, and can be pinned. */
 | 
					   read-only, and can be pinned. */
 | 
				
			||||||
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 | 
					static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						pgd_t *user_pgd = xen_get_user_pgd(pgd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trace_xen_mmu_pgd_pin(mm, pgd);
 | 
						trace_xen_mmu_pgd_pin(mm, pgd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_mc_batch();
 | 
						xen_mc_batch();
 | 
				
			||||||
| 
						 | 
					@ -800,26 +761,14 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 | 
				
			||||||
		xen_mc_batch();
 | 
							xen_mc_batch();
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
						xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
 | 
				
			||||||
	{
 | 
					 | 
				
			||||||
		pgd_t *user_pgd = xen_get_user_pgd(pgd);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
 | 
						if (user_pgd) {
 | 
				
			||||||
 | 
							xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
 | 
				
			||||||
		if (user_pgd) {
 | 
							xen_do_pin(MMUEXT_PIN_L4_TABLE,
 | 
				
			||||||
			xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
 | 
								   PFN_DOWN(__pa(user_pgd)));
 | 
				
			||||||
			xen_do_pin(MMUEXT_PIN_L4_TABLE,
 | 
					 | 
				
			||||||
				   PFN_DOWN(__pa(user_pgd)));
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#else /* CONFIG_X86_32 */
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_PAE
 | 
					 | 
				
			||||||
	/* Need to make sure unshared kernel PMD is pinnable */
 | 
					 | 
				
			||||||
	xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
 | 
					 | 
				
			||||||
		     PT_PMD);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 | 
					 | 
				
			||||||
#endif /* CONFIG_X86_64 */
 | 
					 | 
				
			||||||
	xen_mc_issue(0);
 | 
						xen_mc_issue(0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -870,9 +819,7 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 | 
				
			||||||
static void __init xen_after_bootmem(void)
 | 
					static void __init xen_after_bootmem(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	static_branch_enable(&xen_struct_pages_ready);
 | 
						static_branch_enable(&xen_struct_pages_ready);
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	SetPagePinned(virt_to_page(level3_user_vsyscall));
 | 
						SetPagePinned(virt_to_page(level3_user_vsyscall));
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 | 
						xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -919,29 +866,19 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page,
 | 
				
			||||||
/* Release a pagetables pages back as normal RW */
 | 
					/* Release a pagetables pages back as normal RW */
 | 
				
			||||||
static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
 | 
					static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						pgd_t *user_pgd = xen_get_user_pgd(pgd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trace_xen_mmu_pgd_unpin(mm, pgd);
 | 
						trace_xen_mmu_pgd_unpin(mm, pgd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_mc_batch();
 | 
						xen_mc_batch();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 | 
						xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
						if (user_pgd) {
 | 
				
			||||||
	{
 | 
							xen_do_pin(MMUEXT_UNPIN_TABLE,
 | 
				
			||||||
		pgd_t *user_pgd = xen_get_user_pgd(pgd);
 | 
								   PFN_DOWN(__pa(user_pgd)));
 | 
				
			||||||
 | 
							xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
 | 
				
			||||||
		if (user_pgd) {
 | 
					 | 
				
			||||||
			xen_do_pin(MMUEXT_UNPIN_TABLE,
 | 
					 | 
				
			||||||
				   PFN_DOWN(__pa(user_pgd)));
 | 
					 | 
				
			||||||
			xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef CONFIG_X86_PAE
 | 
					 | 
				
			||||||
	/* Need to make sure unshared kernel PMD is unpinned */
 | 
					 | 
				
			||||||
	xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
 | 
					 | 
				
			||||||
		       PT_PMD);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
 | 
						__xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1089,7 +1026,6 @@ static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 | 
				
			||||||
		BUG();
 | 
							BUG();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
static void __init xen_cleanhighmap(unsigned long vaddr,
 | 
					static void __init xen_cleanhighmap(unsigned long vaddr,
 | 
				
			||||||
				    unsigned long vaddr_end)
 | 
									    unsigned long vaddr_end)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1273,17 +1209,15 @@ static void __init xen_pagetable_cleanhighmap(void)
 | 
				
			||||||
	xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
 | 
						xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
 | 
				
			||||||
	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
 | 
						xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __init xen_pagetable_p2m_setup(void)
 | 
					static void __init xen_pagetable_p2m_setup(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	xen_vmalloc_p2m_tree();
 | 
						xen_vmalloc_p2m_tree();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	xen_pagetable_p2m_free();
 | 
						xen_pagetable_p2m_free();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_pagetable_cleanhighmap();
 | 
						xen_pagetable_cleanhighmap();
 | 
				
			||||||
#endif
 | 
					
 | 
				
			||||||
	/* And revector! Bye bye old array */
 | 
						/* And revector! Bye bye old array */
 | 
				
			||||||
	xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
 | 
						xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1420,6 +1354,8 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
static void xen_write_cr3(unsigned long cr3)
 | 
					static void xen_write_cr3(unsigned long cr3)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(preemptible());
 | 
						BUG_ON(preemptible());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_mc_batch();  /* disables interrupts */
 | 
						xen_mc_batch();  /* disables interrupts */
 | 
				
			||||||
| 
						 | 
					@ -1430,20 +1366,14 @@ static void xen_write_cr3(unsigned long cr3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	__xen_write_cr3(true, cr3);
 | 
						__xen_write_cr3(true, cr3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
						if (user_pgd)
 | 
				
			||||||
	{
 | 
							__xen_write_cr3(false, __pa(user_pgd));
 | 
				
			||||||
		pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
 | 
						else
 | 
				
			||||||
		if (user_pgd)
 | 
							__xen_write_cr3(false, 0);
 | 
				
			||||||
			__xen_write_cr3(false, __pa(user_pgd));
 | 
					 | 
				
			||||||
		else
 | 
					 | 
				
			||||||
			__xen_write_cr3(false, 0);
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 | 
						xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * At the start of the day - when Xen launches a guest, it has already
 | 
					 * At the start of the day - when Xen launches a guest, it has already
 | 
				
			||||||
 * built pagetables for the guest. We diligently look over them
 | 
					 * built pagetables for the guest. We diligently look over them
 | 
				
			||||||
| 
						 | 
					@ -1478,49 +1408,39 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 | 
						xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int xen_pgd_alloc(struct mm_struct *mm)
 | 
					static int xen_pgd_alloc(struct mm_struct *mm)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	pgd_t *pgd = mm->pgd;
 | 
						pgd_t *pgd = mm->pgd;
 | 
				
			||||||
	int ret = 0;
 | 
						struct page *page = virt_to_page(pgd);
 | 
				
			||||||
 | 
						pgd_t *user_pgd;
 | 
				
			||||||
 | 
						int ret = -ENOMEM;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	BUG_ON(PagePinned(virt_to_page(pgd)));
 | 
						BUG_ON(PagePinned(virt_to_page(pgd)));
 | 
				
			||||||
 | 
						BUG_ON(page->private != 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
						user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 | 
				
			||||||
	{
 | 
						page->private = (unsigned long)user_pgd;
 | 
				
			||||||
		struct page *page = virt_to_page(pgd);
 | 
					 | 
				
			||||||
		pgd_t *user_pgd;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		BUG_ON(page->private != 0);
 | 
						if (user_pgd != NULL) {
 | 
				
			||||||
 | 
					 | 
				
			||||||
		ret = -ENOMEM;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 | 
					 | 
				
			||||||
		page->private = (unsigned long)user_pgd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (user_pgd != NULL) {
 | 
					 | 
				
			||||||
#ifdef CONFIG_X86_VSYSCALL_EMULATION
 | 
					#ifdef CONFIG_X86_VSYSCALL_EMULATION
 | 
				
			||||||
			user_pgd[pgd_index(VSYSCALL_ADDR)] =
 | 
							user_pgd[pgd_index(VSYSCALL_ADDR)] =
 | 
				
			||||||
				__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
 | 
								__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
			ret = 0;
 | 
							ret = 0;
 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#endif
 | 
					
 | 
				
			||||||
 | 
						BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return ret;
 | 
						return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 | 
					static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	pgd_t *user_pgd = xen_get_user_pgd(pgd);
 | 
						pgd_t *user_pgd = xen_get_user_pgd(pgd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (user_pgd)
 | 
						if (user_pgd)
 | 
				
			||||||
		free_page((unsigned long)user_pgd);
 | 
							free_page((unsigned long)user_pgd);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -1539,7 +1459,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
__visible pte_t xen_make_pte_init(pteval_t pte)
 | 
					__visible pte_t xen_make_pte_init(pteval_t pte)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	unsigned long pfn;
 | 
						unsigned long pfn;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -1553,7 +1472,7 @@ __visible pte_t xen_make_pte_init(pteval_t pte)
 | 
				
			||||||
	    pfn >= xen_start_info->first_p2m_pfn &&
 | 
						    pfn >= xen_start_info->first_p2m_pfn &&
 | 
				
			||||||
	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
 | 
						    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
 | 
				
			||||||
		pte &= ~_PAGE_RW;
 | 
							pte &= ~_PAGE_RW;
 | 
				
			||||||
#endif
 | 
					
 | 
				
			||||||
	pte = pte_pfn_to_mfn(pte);
 | 
						pte = pte_pfn_to_mfn(pte);
 | 
				
			||||||
	return native_make_pte(pte);
 | 
						return native_make_pte(pte);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1561,13 +1480,6 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 | 
					static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
 | 
					 | 
				
			||||||
	if (pte_mfn(pte) != INVALID_P2M_ENTRY
 | 
					 | 
				
			||||||
	    && pte_val_ma(*ptep) & _PAGE_PRESENT)
 | 
					 | 
				
			||||||
		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
 | 
					 | 
				
			||||||
			       pte_val_ma(pte));
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	__xen_set_pte(ptep, pte);
 | 
						__xen_set_pte(ptep, pte);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1702,7 +1614,6 @@ static void xen_release_pmd(unsigned long pfn)
 | 
				
			||||||
	xen_release_ptpage(pfn, PT_PMD);
 | 
						xen_release_ptpage(pfn, PT_PMD);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 | 
					static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	xen_alloc_ptpage(mm, pfn, PT_PUD);
 | 
						xen_alloc_ptpage(mm, pfn, PT_PUD);
 | 
				
			||||||
| 
						 | 
					@ -1712,20 +1623,6 @@ static void xen_release_pud(unsigned long pfn)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	xen_release_ptpage(pfn, PT_PUD);
 | 
						xen_release_ptpage(pfn, PT_PUD);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void __init xen_reserve_top(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	unsigned long top = HYPERVISOR_VIRT_START;
 | 
					 | 
				
			||||||
	struct xen_platform_parameters pp;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
 | 
					 | 
				
			||||||
		top = pp.virt_start;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	reserve_top_address(-top);
 | 
					 | 
				
			||||||
#endif	/* CONFIG_X86_32 */
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Like __va(), but returns address in the kernel mapping (which is
 | 
					 * Like __va(), but returns address in the kernel mapping (which is
 | 
				
			||||||
| 
						 | 
					@ -1733,11 +1630,7 @@ void __init xen_reserve_top(void)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static void * __init __ka(phys_addr_t paddr)
 | 
					static void * __init __ka(phys_addr_t paddr)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	return (void *)(paddr + __START_KERNEL_map);
 | 
						return (void *)(paddr + __START_KERNEL_map);
 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	return __va(paddr);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Convert a machine address to physical address */
 | 
					/* Convert a machine address to physical address */
 | 
				
			||||||
| 
						 | 
					@ -1771,56 +1664,7 @@ static void __init set_page_prot(void *addr, pgprot_t prot)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return set_page_prot_flags(addr, prot, UVMF_NONE);
 | 
						return set_page_prot_flags(addr, prot, UVMF_NONE);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned pmdidx, pteidx;
 | 
					 | 
				
			||||||
	unsigned ident_pte;
 | 
					 | 
				
			||||||
	unsigned long pfn;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
 | 
					 | 
				
			||||||
				      PAGE_SIZE);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	ident_pte = 0;
 | 
					 | 
				
			||||||
	pfn = 0;
 | 
					 | 
				
			||||||
	for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
 | 
					 | 
				
			||||||
		pte_t *pte_page;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* Reuse or allocate a page of ptes */
 | 
					 | 
				
			||||||
		if (pmd_present(pmd[pmdidx]))
 | 
					 | 
				
			||||||
			pte_page = m2v(pmd[pmdidx].pmd);
 | 
					 | 
				
			||||||
		else {
 | 
					 | 
				
			||||||
			/* Check for free pte pages */
 | 
					 | 
				
			||||||
			if (ident_pte == LEVEL1_IDENT_ENTRIES)
 | 
					 | 
				
			||||||
				break;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			pte_page = &level1_ident_pgt[ident_pte];
 | 
					 | 
				
			||||||
			ident_pte += PTRS_PER_PTE;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		/* Install mappings */
 | 
					 | 
				
			||||||
		for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
 | 
					 | 
				
			||||||
			pte_t pte;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if (pfn > max_pfn_mapped)
 | 
					 | 
				
			||||||
				max_pfn_mapped = pfn;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if (!pte_none(pte_page[pteidx]))
 | 
					 | 
				
			||||||
				continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
 | 
					 | 
				
			||||||
			pte_page[pteidx] = pte;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
 | 
					 | 
				
			||||||
		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	set_page_prot(pmd, PAGE_KERNEL_RO);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
void __init xen_setup_machphys_mapping(void)
 | 
					void __init xen_setup_machphys_mapping(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct xen_machphys_mapping mapping;
 | 
						struct xen_machphys_mapping mapping;
 | 
				
			||||||
| 
						 | 
					@ -1831,13 +1675,8 @@ void __init xen_setup_machphys_mapping(void)
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
		machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
 | 
							machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
 | 
					 | 
				
			||||||
		< machine_to_phys_mapping);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
static void __init convert_pfn_mfn(void *v)
 | 
					static void __init convert_pfn_mfn(void *v)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	pte_t *pte = v;
 | 
						pte_t *pte = v;
 | 
				
			||||||
| 
						 | 
					@ -2168,105 +2007,6 @@ void __init xen_relocate_p2m(void)
 | 
				
			||||||
	xen_start_info->nr_p2m_frames = n_frames;
 | 
						xen_start_info->nr_p2m_frames = n_frames;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#else	/* !CONFIG_X86_64 */
 | 
					 | 
				
			||||||
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
 | 
					 | 
				
			||||||
static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
 | 
					 | 
				
			||||||
RESERVE_BRK(fixup_kernel_pmd, PAGE_SIZE);
 | 
					 | 
				
			||||||
RESERVE_BRK(fixup_kernel_pte, PAGE_SIZE);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void __init xen_write_cr3_init(unsigned long cr3)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	BUG_ON(read_cr3_pa() != __pa(initial_page_table));
 | 
					 | 
				
			||||||
	BUG_ON(cr3 != __pa(swapper_pg_dir));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * We are switching to swapper_pg_dir for the first time (from
 | 
					 | 
				
			||||||
	 * initial_page_table) and therefore need to mark that page
 | 
					 | 
				
			||||||
	 * read-only and then pin it.
 | 
					 | 
				
			||||||
	 *
 | 
					 | 
				
			||||||
	 * Xen disallows sharing of kernel PMDs for PAE
 | 
					 | 
				
			||||||
	 * guests. Therefore we must copy the kernel PMD from
 | 
					 | 
				
			||||||
	 * initial_page_table into a new kernel PMD to be used in
 | 
					 | 
				
			||||||
	 * swapper_pg_dir.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	swapper_kernel_pmd =
 | 
					 | 
				
			||||||
		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
 | 
					 | 
				
			||||||
	copy_page(swapper_kernel_pmd, initial_kernel_pmd);
 | 
					 | 
				
			||||||
	swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
 | 
					 | 
				
			||||||
		__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
 | 
					 | 
				
			||||||
	set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
 | 
					 | 
				
			||||||
	xen_write_cr3(cr3);
 | 
					 | 
				
			||||||
	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
 | 
					 | 
				
			||||||
			  PFN_DOWN(__pa(initial_page_table)));
 | 
					 | 
				
			||||||
	set_page_prot(initial_page_table, PAGE_KERNEL);
 | 
					 | 
				
			||||||
	set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pv_ops.mmu.write_cr3 = &xen_write_cr3;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * For 32 bit domains xen_start_info->pt_base is the pgd address which might be
 | 
					 | 
				
			||||||
 * not the first page table in the page table pool.
 | 
					 | 
				
			||||||
 * Iterate through the initial page tables to find the real page table base.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	phys_addr_t pt_base, paddr;
 | 
					 | 
				
			||||||
	unsigned pmdidx;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++)
 | 
					 | 
				
			||||||
		if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) {
 | 
					 | 
				
			||||||
			paddr = m2p(pmd[pmdidx].pmd);
 | 
					 | 
				
			||||||
			pt_base = min(pt_base, paddr);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return pt_base;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	pmd_t *kernel_pmd;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	xen_pt_base = xen_find_pt_base(kernel_pmd);
 | 
					 | 
				
			||||||
	xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	initial_kernel_pmd =
 | 
					 | 
				
			||||||
		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	copy_page(initial_kernel_pmd, kernel_pmd);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	xen_map_identity_early(initial_kernel_pmd, max_pfn);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	copy_page(initial_page_table, pgd);
 | 
					 | 
				
			||||||
	initial_page_table[KERNEL_PGD_BOUNDARY] =
 | 
					 | 
				
			||||||
		__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
 | 
					 | 
				
			||||||
	set_page_prot(initial_page_table, PAGE_KERNEL_RO);
 | 
					 | 
				
			||||||
	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
 | 
					 | 
				
			||||||
			  PFN_DOWN(__pa(initial_page_table)));
 | 
					 | 
				
			||||||
	xen_write_cr3(__pa(initial_page_table));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	memblock_reserve(xen_pt_base, xen_pt_size);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif	/* CONFIG_X86_64 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void __init xen_reserve_special_pages(void)
 | 
					void __init xen_reserve_special_pages(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	phys_addr_t paddr;
 | 
						phys_addr_t paddr;
 | 
				
			||||||
| 
						 | 
					@ -2300,12 +2040,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (idx) {
 | 
						switch (idx) {
 | 
				
			||||||
	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
 | 
						case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					#ifdef CONFIG_X86_VSYSCALL_EMULATION
 | 
				
			||||||
	case FIX_WP_TEST:
 | 
					 | 
				
			||||||
# ifdef CONFIG_HIGHMEM
 | 
					 | 
				
			||||||
	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
 | 
					 | 
				
			||||||
# endif
 | 
					 | 
				
			||||||
#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
 | 
					 | 
				
			||||||
	case VSYSCALL_PAGE:
 | 
						case VSYSCALL_PAGE:
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
		/* All local page mappings */
 | 
							/* All local page mappings */
 | 
				
			||||||
| 
						 | 
					@ -2357,9 +2092,7 @@ static void __init xen_post_allocator_init(void)
 | 
				
			||||||
	pv_ops.mmu.set_pte = xen_set_pte;
 | 
						pv_ops.mmu.set_pte = xen_set_pte;
 | 
				
			||||||
	pv_ops.mmu.set_pmd = xen_set_pmd;
 | 
						pv_ops.mmu.set_pmd = xen_set_pmd;
 | 
				
			||||||
	pv_ops.mmu.set_pud = xen_set_pud;
 | 
						pv_ops.mmu.set_pud = xen_set_pud;
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	pv_ops.mmu.set_p4d = xen_set_p4d;
 | 
						pv_ops.mmu.set_p4d = xen_set_p4d;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* This will work as long as patching hasn't happened yet
 | 
						/* This will work as long as patching hasn't happened yet
 | 
				
			||||||
	   (which it hasn't) */
 | 
						   (which it hasn't) */
 | 
				
			||||||
| 
						 | 
					@ -2367,15 +2100,11 @@ static void __init xen_post_allocator_init(void)
 | 
				
			||||||
	pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
 | 
						pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
 | 
				
			||||||
	pv_ops.mmu.release_pte = xen_release_pte;
 | 
						pv_ops.mmu.release_pte = xen_release_pte;
 | 
				
			||||||
	pv_ops.mmu.release_pmd = xen_release_pmd;
 | 
						pv_ops.mmu.release_pmd = xen_release_pmd;
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	pv_ops.mmu.alloc_pud = xen_alloc_pud;
 | 
						pv_ops.mmu.alloc_pud = xen_alloc_pud;
 | 
				
			||||||
	pv_ops.mmu.release_pud = xen_release_pud;
 | 
						pv_ops.mmu.release_pud = xen_release_pud;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
 | 
						pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	pv_ops.mmu.write_cr3 = &xen_write_cr3;
 | 
						pv_ops.mmu.write_cr3 = &xen_write_cr3;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void xen_leave_lazy_mmu(void)
 | 
					static void xen_leave_lazy_mmu(void)
 | 
				
			||||||
| 
						 | 
					@ -2420,17 +2149,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 | 
				
			||||||
	.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
 | 
						.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
 | 
				
			||||||
	.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
 | 
						.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_PAE
 | 
					 | 
				
			||||||
	.set_pte_atomic = xen_set_pte_atomic,
 | 
					 | 
				
			||||||
	.pte_clear = xen_pte_clear,
 | 
					 | 
				
			||||||
	.pmd_clear = xen_pmd_clear,
 | 
					 | 
				
			||||||
#endif	/* CONFIG_X86_PAE */
 | 
					 | 
				
			||||||
	.set_pud = xen_set_pud_hyper,
 | 
						.set_pud = xen_set_pud_hyper,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
 | 
						.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
 | 
				
			||||||
	.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
 | 
						.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
 | 
						.pud_val = PV_CALLEE_SAVE(xen_pud_val),
 | 
				
			||||||
	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 | 
						.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 | 
				
			||||||
	.set_p4d = xen_set_p4d_hyper,
 | 
						.set_p4d = xen_set_p4d_hyper,
 | 
				
			||||||
| 
						 | 
					@ -2442,7 +2165,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 | 
				
			||||||
	.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
 | 
						.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
 | 
				
			||||||
	.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
 | 
						.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#endif	/* CONFIG_X86_64 */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	.activate_mm = xen_activate_mm,
 | 
						.activate_mm = xen_activate_mm,
 | 
				
			||||||
	.dup_mmap = xen_dup_mmap,
 | 
						.dup_mmap = xen_dup_mmap,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -379,12 +379,8 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (type == P2M_TYPE_PFN || i < chunk) {
 | 
							if (type == P2M_TYPE_PFN || i < chunk) {
 | 
				
			||||||
			/* Use initial p2m page contents. */
 | 
								/* Use initial p2m page contents. */
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
			mfns = alloc_p2m_page();
 | 
								mfns = alloc_p2m_page();
 | 
				
			||||||
			copy_page(mfns, xen_p2m_addr + pfn);
 | 
								copy_page(mfns, xen_p2m_addr + pfn);
 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
			mfns = xen_p2m_addr + pfn;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
			ptep = populate_extra_pte((unsigned long)(p2m + pfn));
 | 
								ptep = populate_extra_pte((unsigned long)(p2m + pfn));
 | 
				
			||||||
			set_pte(ptep,
 | 
								set_pte(ptep,
 | 
				
			||||||
				pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
 | 
									pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
 | 
				
			||||||
| 
						 | 
					@ -467,7 +463,7 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine);
 | 
				
			||||||
 * Allocate new pmd(s). It is checked whether the old pmd is still in place.
 | 
					 * Allocate new pmd(s). It is checked whether the old pmd is still in place.
 | 
				
			||||||
 * If not, nothing is changed. This is okay as the only reason for allocating
 | 
					 * If not, nothing is changed. This is okay as the only reason for allocating
 | 
				
			||||||
 * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
 | 
					 * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
 | 
				
			||||||
 * pmd. In case of PAE/x86-32 there are multiple pmds to allocate!
 | 
					 * pmd.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
 | 
					static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -32,7 +32,6 @@
 | 
				
			||||||
#include <xen/features.h>
 | 
					#include <xen/features.h>
 | 
				
			||||||
#include <xen/hvc-console.h>
 | 
					#include <xen/hvc-console.h>
 | 
				
			||||||
#include "xen-ops.h"
 | 
					#include "xen-ops.h"
 | 
				
			||||||
#include "vdso.h"
 | 
					 | 
				
			||||||
#include "mmu.h"
 | 
					#include "mmu.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
 | 
					#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
 | 
				
			||||||
| 
						 | 
					@ -545,13 +544,10 @@ static unsigned long __init xen_get_pages_limit(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long limit;
 | 
						unsigned long limit;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	limit = GB(64) / PAGE_SIZE;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	limit = MAXMEM / PAGE_SIZE;
 | 
						limit = MAXMEM / PAGE_SIZE;
 | 
				
			||||||
	if (!xen_initial_domain() && xen_512gb_limit)
 | 
						if (!xen_initial_domain() && xen_512gb_limit)
 | 
				
			||||||
		limit = GB(512) / PAGE_SIZE;
 | 
							limit = GB(512) / PAGE_SIZE;
 | 
				
			||||||
#endif
 | 
					
 | 
				
			||||||
	return limit;
 | 
						return limit;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -722,17 +718,8 @@ static void __init xen_reserve_xen_mfnlist(void)
 | 
				
			||||||
	if (!xen_is_e820_reserved(start, size))
 | 
						if (!xen_is_e820_reserved(start, size))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Relocating the p2m on 32 bit system to an arbitrary virtual address
 | 
					 | 
				
			||||||
	 * is not supported, so just give up.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n");
 | 
					 | 
				
			||||||
	BUG();
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	xen_relocate_p2m();
 | 
						xen_relocate_p2m();
 | 
				
			||||||
	memblock_free(start, size);
 | 
						memblock_free(start, size);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
| 
						 | 
					@ -921,20 +908,6 @@ char * __init xen_memory_setup(void)
 | 
				
			||||||
	return "Xen";
 | 
						return "Xen";
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Set the bit indicating "nosegneg" library variants should be used.
 | 
					 | 
				
			||||||
 * We only need to bother in pure 32-bit mode; compat 32-bit processes
 | 
					 | 
				
			||||||
 * can have un-truncated segments, so wrapping around is allowed.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
static void __init fiddle_vdso(void)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	u32 *mask = vdso_image_32.data +
 | 
					 | 
				
			||||||
		vdso_image_32.sym_VDSO32_NOTE_MASK;
 | 
					 | 
				
			||||||
	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int register_callback(unsigned type, const void *func)
 | 
					static int register_callback(unsigned type, const void *func)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct callback_register callback = {
 | 
						struct callback_register callback = {
 | 
				
			||||||
| 
						 | 
					@ -951,11 +924,7 @@ void xen_enable_sysenter(void)
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
	unsigned sysenter_feature;
 | 
						unsigned sysenter_feature;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	sysenter_feature = X86_FEATURE_SEP;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	sysenter_feature = X86_FEATURE_SYSENTER32;
 | 
						sysenter_feature = X86_FEATURE_SYSENTER32;
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!boot_cpu_has(sysenter_feature))
 | 
						if (!boot_cpu_has(sysenter_feature))
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					@ -967,7 +936,6 @@ void xen_enable_sysenter(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void xen_enable_syscall(void)
 | 
					void xen_enable_syscall(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
 | 
						ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
 | 
				
			||||||
| 
						 | 
					@ -983,7 +951,6 @@ void xen_enable_syscall(void)
 | 
				
			||||||
		if (ret != 0)
 | 
							if (ret != 0)
 | 
				
			||||||
			setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
 | 
								setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
#endif /* CONFIG_X86_64 */
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void __init xen_pvmmu_arch_setup(void)
 | 
					static void __init xen_pvmmu_arch_setup(void)
 | 
				
			||||||
| 
						 | 
					@ -1024,7 +991,6 @@ void __init xen_arch_setup(void)
 | 
				
			||||||
	disable_cpuidle();
 | 
						disable_cpuidle();
 | 
				
			||||||
	disable_cpufreq();
 | 
						disable_cpufreq();
 | 
				
			||||||
	WARN_ON(xen_set_default_idle());
 | 
						WARN_ON(xen_set_default_idle());
 | 
				
			||||||
	fiddle_vdso();
 | 
					 | 
				
			||||||
#ifdef CONFIG_NUMA
 | 
					#ifdef CONFIG_NUMA
 | 
				
			||||||
	numa_off = 1;
 | 
						numa_off = 1;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -212,15 +212,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
 | 
				
			||||||
		 * sure the old memory can be recycled. */
 | 
							 * sure the old memory can be recycled. */
 | 
				
			||||||
		make_lowmem_page_readwrite(xen_initial_gdt);
 | 
							make_lowmem_page_readwrite(xen_initial_gdt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Xen starts us with XEN_FLAT_RING1_DS, but linux code
 | 
					 | 
				
			||||||
	 * expects __USER_DS
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	loadsegment(ds, __USER_DS);
 | 
					 | 
				
			||||||
	loadsegment(es, __USER_DS);
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	xen_filter_cpu_maps();
 | 
						xen_filter_cpu_maps();
 | 
				
			||||||
	xen_setup_vcpu_info_placement();
 | 
						xen_setup_vcpu_info_placement();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -301,10 +292,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gdt = get_cpu_gdt_rw(cpu);
 | 
						gdt = get_cpu_gdt_rw(cpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	ctxt->user_regs.fs = __KERNEL_PERCPU;
 | 
					 | 
				
			||||||
	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 | 
						memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
| 
						 | 
					@ -342,12 +329,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 | 
				
			||||||
	ctxt->kernel_ss = __KERNEL_DS;
 | 
						ctxt->kernel_ss = __KERNEL_DS;
 | 
				
			||||||
	ctxt->kernel_sp = task_top_of_stack(idle);
 | 
						ctxt->kernel_sp = task_top_of_stack(idle);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	ctxt->event_callback_cs     = __KERNEL_CS;
 | 
					 | 
				
			||||||
	ctxt->failsafe_callback_cs  = __KERNEL_CS;
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	ctxt->gs_base_kernel = per_cpu_offset(cpu);
 | 
						ctxt->gs_base_kernel = per_cpu_offset(cpu);
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	ctxt->event_callback_eip    =
 | 
						ctxt->event_callback_eip    =
 | 
				
			||||||
		(unsigned long)xen_asm_exc_xen_hypervisor_callback;
 | 
							(unsigned long)xen_asm_exc_xen_hypervisor_callback;
 | 
				
			||||||
	ctxt->failsafe_callback_eip =
 | 
						ctxt->failsafe_callback_eip =
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,6 +0,0 @@
 | 
				
			||||||
/* SPDX-License-Identifier: GPL-2.0 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Bit used for the pseudo-hwcap for non-negative segments.  We use
 | 
					 | 
				
			||||||
   bit 1 to avoid bugs in some versions of glibc when bit 0 is
 | 
					 | 
				
			||||||
   used; the choice is otherwise arbitrary. */
 | 
					 | 
				
			||||||
#define VDSO_NOTE_NONEGSEG_BIT	1
 | 
					 | 
				
			||||||
| 
						 | 
					@ -76,11 +76,7 @@ SYM_FUNC_END(xen_save_fl_direct)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
SYM_FUNC_START(xen_restore_fl_direct)
 | 
					SYM_FUNC_START(xen_restore_fl_direct)
 | 
				
			||||||
	FRAME_BEGIN
 | 
						FRAME_BEGIN
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	testw $X86_EFLAGS_IF, %di
 | 
						testw $X86_EFLAGS_IF, %di
 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	testb $X86_EFLAGS_IF>>8, %ah
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 | 
						setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Preempt here doesn't matter because that will deal with any
 | 
						 * Preempt here doesn't matter because that will deal with any
 | 
				
			||||||
| 
						 | 
					@ -104,15 +100,6 @@ SYM_FUNC_END(xen_restore_fl_direct)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
SYM_FUNC_START(check_events)
 | 
					SYM_FUNC_START(check_events)
 | 
				
			||||||
	FRAME_BEGIN
 | 
						FRAME_BEGIN
 | 
				
			||||||
#ifdef CONFIG_X86_32
 | 
					 | 
				
			||||||
	push %eax
 | 
					 | 
				
			||||||
	push %ecx
 | 
					 | 
				
			||||||
	push %edx
 | 
					 | 
				
			||||||
	call xen_force_evtchn_callback
 | 
					 | 
				
			||||||
	pop %edx
 | 
					 | 
				
			||||||
	pop %ecx
 | 
					 | 
				
			||||||
	pop %eax
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	push %rax
 | 
						push %rax
 | 
				
			||||||
	push %rcx
 | 
						push %rcx
 | 
				
			||||||
	push %rdx
 | 
						push %rdx
 | 
				
			||||||
| 
						 | 
					@ -132,7 +119,6 @@ SYM_FUNC_START(check_events)
 | 
				
			||||||
	pop %rdx
 | 
						pop %rdx
 | 
				
			||||||
	pop %rcx
 | 
						pop %rcx
 | 
				
			||||||
	pop %rax
 | 
						pop %rax
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	FRAME_END
 | 
						FRAME_END
 | 
				
			||||||
	ret
 | 
						ret
 | 
				
			||||||
SYM_FUNC_END(check_events)
 | 
					SYM_FUNC_END(check_events)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,185 +0,0 @@
 | 
				
			||||||
/* SPDX-License-Identifier: GPL-2.0 */
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Asm versions of Xen pv-ops, suitable for direct use.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * We only bother with direct forms (ie, vcpu in pda) of the
 | 
					 | 
				
			||||||
 * operations here; the indirect forms are better handled in C.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <asm/thread_info.h>
 | 
					 | 
				
			||||||
#include <asm/processor-flags.h>
 | 
					 | 
				
			||||||
#include <asm/segment.h>
 | 
					 | 
				
			||||||
#include <asm/asm.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <xen/interface/xen.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <linux/linkage.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/* Pseudo-flag used for virtual NMI, which we don't implement yet */
 | 
					 | 
				
			||||||
#define XEN_EFLAGS_NMI  0x80000000
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * This is run where a normal iret would be run, with the same stack setup:
 | 
					 | 
				
			||||||
 *	8: eflags
 | 
					 | 
				
			||||||
 *	4: cs
 | 
					 | 
				
			||||||
 *	esp-> 0: eip
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * This attempts to make sure that any pending events are dealt with
 | 
					 | 
				
			||||||
 * on return to usermode, but there is a small window in which an
 | 
					 | 
				
			||||||
 * event can happen just before entering usermode.  If the nested
 | 
					 | 
				
			||||||
 * interrupt ends up setting one of the TIF_WORK_MASK pending work
 | 
					 | 
				
			||||||
 * flags, they will not be tested again before returning to
 | 
					 | 
				
			||||||
 * usermode. This means that a process can end up with pending work,
 | 
					 | 
				
			||||||
 * which will be unprocessed until the process enters and leaves the
 | 
					 | 
				
			||||||
 * kernel again, which could be an unbounded amount of time.  This
 | 
					 | 
				
			||||||
 * means that a pending signal or reschedule event could be
 | 
					 | 
				
			||||||
 * indefinitely delayed.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * The fix is to notice a nested interrupt in the critical window, and
 | 
					 | 
				
			||||||
 * if one occurs, then fold the nested interrupt into the current
 | 
					 | 
				
			||||||
 * interrupt stack frame, and re-process it iteratively rather than
 | 
					 | 
				
			||||||
 * recursively.  This means that it will exit via the normal path, and
 | 
					 | 
				
			||||||
 * all pending work will be dealt with appropriately.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * Because the nested interrupt handler needs to deal with the current
 | 
					 | 
				
			||||||
 * stack state in whatever form its in, we keep things simple by only
 | 
					 | 
				
			||||||
 * using a single register which is pushed/popped on the stack.
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.macro POP_FS
 | 
					 | 
				
			||||||
1:
 | 
					 | 
				
			||||||
	popw %fs
 | 
					 | 
				
			||||||
.pushsection .fixup, "ax"
 | 
					 | 
				
			||||||
2:	movw $0, (%esp)
 | 
					 | 
				
			||||||
	jmp 1b
 | 
					 | 
				
			||||||
.popsection
 | 
					 | 
				
			||||||
	_ASM_EXTABLE(1b,2b)
 | 
					 | 
				
			||||||
.endm
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SYM_CODE_START(xen_iret)
 | 
					 | 
				
			||||||
	/* test eflags for special cases */
 | 
					 | 
				
			||||||
	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
 | 
					 | 
				
			||||||
	jnz hyper_iret
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	push %eax
 | 
					 | 
				
			||||||
	ESP_OFFSET=4	# bytes pushed onto stack
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* Store vcpu_info pointer for easy access */
 | 
					 | 
				
			||||||
#ifdef CONFIG_SMP
 | 
					 | 
				
			||||||
	pushw %fs
 | 
					 | 
				
			||||||
	movl $(__KERNEL_PERCPU), %eax
 | 
					 | 
				
			||||||
	movl %eax, %fs
 | 
					 | 
				
			||||||
	movl %fs:xen_vcpu, %eax
 | 
					 | 
				
			||||||
	POP_FS
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	movl %ss:xen_vcpu, %eax
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* check IF state we're restoring */
 | 
					 | 
				
			||||||
	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Maybe enable events.  Once this happens we could get a
 | 
					 | 
				
			||||||
	 * recursive event, so the critical region starts immediately
 | 
					 | 
				
			||||||
	 * afterwards.  However, if that happens we don't end up
 | 
					 | 
				
			||||||
	 * resuming the code, so we don't have to be worried about
 | 
					 | 
				
			||||||
	 * being preempted to another CPU.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	setz %ss:XEN_vcpu_info_mask(%eax)
 | 
					 | 
				
			||||||
xen_iret_start_crit:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/* check for unmasked and pending */
 | 
					 | 
				
			||||||
	cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * If there's something pending, mask events again so we can
 | 
					 | 
				
			||||||
	 * jump back into exc_xen_hypervisor_callback. Otherwise do not
 | 
					 | 
				
			||||||
	 * touch XEN_vcpu_info_mask.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	jne 1f
 | 
					 | 
				
			||||||
	movb $1, %ss:XEN_vcpu_info_mask(%eax)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1:	popl %eax
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * From this point on the registers are restored and the stack
 | 
					 | 
				
			||||||
	 * updated, so we don't need to worry about it if we're
 | 
					 | 
				
			||||||
	 * preempted
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
iret_restore_end:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Jump to hypervisor_callback after fixing up the stack.
 | 
					 | 
				
			||||||
	 * Events are masked, so jumping out of the critical region is
 | 
					 | 
				
			||||||
	 * OK.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	je xen_asm_exc_xen_hypervisor_callback
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1:	iret
 | 
					 | 
				
			||||||
xen_iret_end_crit:
 | 
					 | 
				
			||||||
	_ASM_EXTABLE(1b, asm_iret_error)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
hyper_iret:
 | 
					 | 
				
			||||||
	/* put this out of line since its very rarely used */
 | 
					 | 
				
			||||||
	jmp hypercall_page + __HYPERVISOR_iret * 32
 | 
					 | 
				
			||||||
SYM_CODE_END(xen_iret)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	.globl xen_iret_start_crit, xen_iret_end_crit
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees
 | 
					 | 
				
			||||||
 * that the EIP at the time of interrupt was between
 | 
					 | 
				
			||||||
 * xen_iret_start_crit and xen_iret_end_crit.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * The stack format at this point is:
 | 
					 | 
				
			||||||
 *	----------------
 | 
					 | 
				
			||||||
 *	 ss		: (ss/esp may be present if we came from usermode)
 | 
					 | 
				
			||||||
 *	 esp		:
 | 
					 | 
				
			||||||
 *	 eflags		}  outer exception info
 | 
					 | 
				
			||||||
 *	 cs		}
 | 
					 | 
				
			||||||
 *	 eip		}
 | 
					 | 
				
			||||||
 *	----------------
 | 
					 | 
				
			||||||
 *	 eax		:  outer eax if it hasn't been restored
 | 
					 | 
				
			||||||
 *	----------------
 | 
					 | 
				
			||||||
 *	 eflags		}
 | 
					 | 
				
			||||||
 *	 cs		}  nested exception info
 | 
					 | 
				
			||||||
 *	 eip		}
 | 
					 | 
				
			||||||
 *	 return address	: (into xen_asm_exc_xen_hypervisor_callback)
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * In order to deliver the nested exception properly, we need to discard the
 | 
					 | 
				
			||||||
 * nested exception frame such that when we handle the exception, we do it
 | 
					 | 
				
			||||||
 * in the context of the outer exception rather than starting a new one.
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * The only caveat is that if the outer eax hasn't been restored yet (i.e.
 | 
					 | 
				
			||||||
 * it's still on stack), we need to restore its value here.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
.pushsection .noinstr.text, "ax"
 | 
					 | 
				
			||||||
SYM_CODE_START(xen_iret_crit_fixup)
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Paranoia: Make sure we're really coming from kernel space.
 | 
					 | 
				
			||||||
	 * One could imagine a case where userspace jumps into the
 | 
					 | 
				
			||||||
	 * critical range address, but just before the CPU delivers a
 | 
					 | 
				
			||||||
	 * PF, it decides to deliver an interrupt instead.  Unlikely?
 | 
					 | 
				
			||||||
	 * Definitely.  Easy to avoid?  Yes.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	testb $2, 2*4(%esp)		/* nested CS */
 | 
					 | 
				
			||||||
	jnz 2f
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * If eip is before iret_restore_end then stack
 | 
					 | 
				
			||||||
	 * hasn't been restored yet.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	cmpl $iret_restore_end, 1*4(%esp)
 | 
					 | 
				
			||||||
	jae 1f
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	movl 4*4(%esp), %eax		/* load outer EAX */
 | 
					 | 
				
			||||||
	ret $4*4			/* discard nested EIP, CS, and EFLAGS as
 | 
					 | 
				
			||||||
					 * well as the just restored EAX */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1:
 | 
					 | 
				
			||||||
	ret $3*4			/* discard nested EIP, CS, and EFLAGS */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2:
 | 
					 | 
				
			||||||
	ret
 | 
					 | 
				
			||||||
SYM_CODE_END(xen_iret_crit_fixup)
 | 
					 | 
				
			||||||
.popsection
 | 
					 | 
				
			||||||
| 
						 | 
					@ -35,13 +35,8 @@ SYM_CODE_START(startup_xen)
 | 
				
			||||||
	rep __ASM_SIZE(stos)
 | 
						rep __ASM_SIZE(stos)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mov %_ASM_SI, xen_start_info
 | 
						mov %_ASM_SI, xen_start_info
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	mov initial_stack(%rip), %rsp
 | 
						mov initial_stack(%rip), %rsp
 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	mov initial_stack, %esp
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					 | 
				
			||||||
	/* Set up %gs.
 | 
						/* Set up %gs.
 | 
				
			||||||
	 *
 | 
						 *
 | 
				
			||||||
	 * The base of %gs always points to fixed_percpu_data.  If the
 | 
						 * The base of %gs always points to fixed_percpu_data.  If the
 | 
				
			||||||
| 
						 | 
					@ -53,7 +48,6 @@ SYM_CODE_START(startup_xen)
 | 
				
			||||||
	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
 | 
						movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
 | 
				
			||||||
	cdq
 | 
						cdq
 | 
				
			||||||
	wrmsr
 | 
						wrmsr
 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	call xen_start_kernel
 | 
						call xen_start_kernel
 | 
				
			||||||
SYM_CODE_END(startup_xen)
 | 
					SYM_CODE_END(startup_xen)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -33,7 +33,6 @@ void xen_setup_mfn_list_list(void);
 | 
				
			||||||
void xen_build_mfn_list_list(void);
 | 
					void xen_build_mfn_list_list(void);
 | 
				
			||||||
void xen_setup_machphys_mapping(void);
 | 
					void xen_setup_machphys_mapping(void);
 | 
				
			||||||
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 | 
					void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 | 
				
			||||||
void xen_reserve_top(void);
 | 
					 | 
				
			||||||
void __init xen_reserve_special_pages(void);
 | 
					void __init xen_reserve_special_pages(void);
 | 
				
			||||||
void __init xen_pt_check_e820(void);
 | 
					void __init xen_pt_check_e820(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -52,9 +52,7 @@ config XEN_BALLOON_MEMORY_HOTPLUG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
 | 
					config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
 | 
				
			||||||
	int "Hotplugged memory limit (in GiB) for a PV guest"
 | 
						int "Hotplugged memory limit (in GiB) for a PV guest"
 | 
				
			||||||
	default 512 if X86_64
 | 
						default 512
 | 
				
			||||||
	default 4 if X86_32
 | 
					 | 
				
			||||||
	range 0 64 if X86_32
 | 
					 | 
				
			||||||
	depends on XEN_HAVE_PVMMU
 | 
						depends on XEN_HAVE_PVMMU
 | 
				
			||||||
	depends on XEN_BALLOON_MEMORY_HOTPLUG
 | 
						depends on XEN_BALLOON_MEMORY_HOTPLUG
 | 
				
			||||||
	help
 | 
						help
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue