forked from mirrors/linux
		
	Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (80 commits) KVM: Use CPU_DYING for disabling virtualization KVM: Tune hotplug/suspend IPIs KVM: Keep track of which cpus have virtualization enabled SMP: Allow smp_call_function_single() to current cpu i386: Allow smp_call_function_single() to current cpu x86_64: Allow smp_call_function_single() to current cpu HOTPLUG: Adapt thermal throttle to CPU_DYING HOTPLUG: Adapt cpuset hotplug callback to CPU_DYING HOTPLUG: Add CPU_DYING notifier KVM: Clean up #includes KVM: Remove kvmfs in favor of the anonymous inodes source KVM: SVM: Reliably detect if SVM was disabled by BIOS KVM: VMX: Remove unnecessary code in vmx_tlb_flush() KVM: MMU: Fix Wrong tlb flush order KVM: VMX: Reinitialize the real-mode tss when entering real mode KVM: Avoid useless memory write when possible KVM: Fix x86 emulator writeback KVM: Add support for in-kernel pio handlers KVM: VMX: Fix interrupt checking on lightweight exit KVM: Adds support for in-kernel mmio handlers ...
This commit is contained in:
		
						commit
						49c13b51a1
					
				
					 18 changed files with 1203 additions and 768 deletions
				
			
		| 
						 | 
					@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sys_dev = get_cpu_sysdev(cpu);
 | 
						sys_dev = get_cpu_sysdev(cpu);
 | 
				
			||||||
	mutex_lock(&therm_cpu_lock);
 | 
					 | 
				
			||||||
	switch (action) {
 | 
						switch (action) {
 | 
				
			||||||
	case CPU_ONLINE:
 | 
						case CPU_ONLINE:
 | 
				
			||||||
	case CPU_ONLINE_FROZEN:
 | 
						case CPU_ONLINE_FROZEN:
 | 
				
			||||||
 | 
							mutex_lock(&therm_cpu_lock);
 | 
				
			||||||
		err = thermal_throttle_add_dev(sys_dev);
 | 
							err = thermal_throttle_add_dev(sys_dev);
 | 
				
			||||||
 | 
							mutex_unlock(&therm_cpu_lock);
 | 
				
			||||||
		WARN_ON(err);
 | 
							WARN_ON(err);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	case CPU_DEAD:
 | 
						case CPU_DEAD:
 | 
				
			||||||
	case CPU_DEAD_FROZEN:
 | 
						case CPU_DEAD_FROZEN:
 | 
				
			||||||
 | 
							mutex_lock(&therm_cpu_lock);
 | 
				
			||||||
		thermal_throttle_remove_dev(sys_dev);
 | 
							thermal_throttle_remove_dev(sys_dev);
 | 
				
			||||||
 | 
							mutex_unlock(&therm_cpu_lock);
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	mutex_unlock(&therm_cpu_lock);
 | 
					 | 
				
			||||||
	return NOTIFY_OK;
 | 
						return NOTIFY_OK;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
 | 
				
			||||||
EXPORT_SYMBOL(smp_call_function);
 | 
					EXPORT_SYMBOL(smp_call_function);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * smp_call_function_single - Run a function on another CPU
 | 
					 * smp_call_function_single - Run a function on a specific CPU
 | 
				
			||||||
 * @cpu: The target CPU.  Cannot be the calling CPU.
 | 
					 * @cpu: The target CPU.  Cannot be the calling CPU.
 | 
				
			||||||
 * @func: The function to run. This must be fast and non-blocking.
 | 
					 * @func: The function to run. This must be fast and non-blocking.
 | 
				
			||||||
 * @info: An arbitrary pointer to pass to the function.
 | 
					 * @info: An arbitrary pointer to pass to the function.
 | 
				
			||||||
| 
						 | 
					@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 | 
				
			||||||
	int ret;
 | 
						int ret;
 | 
				
			||||||
	int me = get_cpu();
 | 
						int me = get_cpu();
 | 
				
			||||||
	if (cpu == me) {
 | 
						if (cpu == me) {
 | 
				
			||||||
		WARN_ON(1);
 | 
							local_irq_disable();
 | 
				
			||||||
 | 
							func(info);
 | 
				
			||||||
 | 
							local_irq_enable();
 | 
				
			||||||
		put_cpu();
 | 
							put_cpu();
 | 
				
			||||||
		return -EBUSY;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
 | 
						ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -357,7 +357,7 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * smp_call_function_single - Run a function on another CPU
 | 
					 * smp_call_function_single - Run a function on a specific CPU
 | 
				
			||||||
 * @func: The function to run. This must be fast and non-blocking.
 | 
					 * @func: The function to run. This must be fast and non-blocking.
 | 
				
			||||||
 * @info: An arbitrary pointer to pass to the function.
 | 
					 * @info: An arbitrary pointer to pass to the function.
 | 
				
			||||||
 * @nonatomic: Currently unused.
 | 
					 * @nonatomic: Currently unused.
 | 
				
			||||||
| 
						 | 
					@ -374,14 +374,18 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	/* prevent preemption and reschedule on another processor */
 | 
						/* prevent preemption and reschedule on another processor */
 | 
				
			||||||
	int me = get_cpu();
 | 
						int me = get_cpu();
 | 
				
			||||||
	if (cpu == me) {
 | 
					 | 
				
			||||||
		put_cpu();
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Can deadlock when called with interrupts disabled */
 | 
						/* Can deadlock when called with interrupts disabled */
 | 
				
			||||||
	WARN_ON(irqs_disabled());
 | 
						WARN_ON(irqs_disabled());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cpu == me) {
 | 
				
			||||||
 | 
							local_irq_disable();
 | 
				
			||||||
 | 
							func(info);
 | 
				
			||||||
 | 
							local_irq_enable();
 | 
				
			||||||
 | 
							put_cpu();
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	spin_lock_bh(&call_lock);
 | 
						spin_lock_bh(&call_lock);
 | 
				
			||||||
	__smp_call_function_single(cpu, func, info, nonatomic, wait);
 | 
						__smp_call_function_single(cpu, func, info, nonatomic, wait);
 | 
				
			||||||
	spin_unlock_bh(&call_lock);
 | 
						spin_unlock_bh(&call_lock);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,12 +1,17 @@
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# KVM configuration
 | 
					# KVM configuration
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
menu "Virtualization"
 | 
					menuconfig VIRTUALIZATION
 | 
				
			||||||
 | 
						bool "Virtualization"
 | 
				
			||||||
	depends on X86
 | 
						depends on X86
 | 
				
			||||||
 | 
						default y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if VIRTUALIZATION
 | 
				
			||||||
 | 
					
 | 
				
			||||||
config KVM
 | 
					config KVM
 | 
				
			||||||
	tristate "Kernel-based Virtual Machine (KVM) support"
 | 
						tristate "Kernel-based Virtual Machine (KVM) support"
 | 
				
			||||||
	depends on X86 && EXPERIMENTAL
 | 
						depends on X86 && EXPERIMENTAL
 | 
				
			||||||
 | 
						depends on X86_CMPXCHG64 || 64BIT
 | 
				
			||||||
	---help---
 | 
						---help---
 | 
				
			||||||
	  Support hosting fully virtualized guest machines using hardware
 | 
						  Support hosting fully virtualized guest machines using hardware
 | 
				
			||||||
	  virtualization extensions.  You will need a fairly recent
 | 
						  virtualization extensions.  You will need a fairly recent
 | 
				
			||||||
| 
						 | 
					@ -35,4 +40,4 @@ config KVM_AMD
 | 
				
			||||||
	  Provides support for KVM on AMD processors equipped with the AMD-V
 | 
						  Provides support for KVM on AMD processors equipped with the AMD-V
 | 
				
			||||||
	  (SVM) extensions.
 | 
						  (SVM) extensions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
endmenu
 | 
					endif # VIRTUALIZATION
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,6 +10,8 @@
 | 
				
			||||||
#include <linux/list.h>
 | 
					#include <linux/list.h>
 | 
				
			||||||
#include <linux/mutex.h>
 | 
					#include <linux/mutex.h>
 | 
				
			||||||
#include <linux/spinlock.h>
 | 
					#include <linux/spinlock.h>
 | 
				
			||||||
 | 
					#include <linux/signal.h>
 | 
				
			||||||
 | 
					#include <linux/sched.h>
 | 
				
			||||||
#include <linux/mm.h>
 | 
					#include <linux/mm.h>
 | 
				
			||||||
#include <asm/signal.h>
 | 
					#include <asm/signal.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -18,6 +20,7 @@
 | 
				
			||||||
#include <linux/kvm_para.h>
 | 
					#include <linux/kvm_para.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CR0_PE_MASK (1ULL << 0)
 | 
					#define CR0_PE_MASK (1ULL << 0)
 | 
				
			||||||
 | 
					#define CR0_MP_MASK (1ULL << 1)
 | 
				
			||||||
#define CR0_TS_MASK (1ULL << 3)
 | 
					#define CR0_TS_MASK (1ULL << 3)
 | 
				
			||||||
#define CR0_NE_MASK (1ULL << 5)
 | 
					#define CR0_NE_MASK (1ULL << 5)
 | 
				
			||||||
#define CR0_WP_MASK (1ULL << 16)
 | 
					#define CR0_WP_MASK (1ULL << 16)
 | 
				
			||||||
| 
						 | 
					@ -42,7 +45,8 @@
 | 
				
			||||||
	(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \
 | 
						(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \
 | 
				
			||||||
	 | CR0_NW_MASK | CR0_CD_MASK)
 | 
						 | CR0_NW_MASK | CR0_CD_MASK)
 | 
				
			||||||
#define KVM_VM_CR0_ALWAYS_ON \
 | 
					#define KVM_VM_CR0_ALWAYS_ON \
 | 
				
			||||||
	(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK)
 | 
						(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \
 | 
				
			||||||
 | 
						 | CR0_MP_MASK)
 | 
				
			||||||
#define KVM_GUEST_CR4_MASK \
 | 
					#define KVM_GUEST_CR4_MASK \
 | 
				
			||||||
	(CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK)
 | 
						(CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK)
 | 
				
			||||||
#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK)
 | 
					#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK)
 | 
				
			||||||
| 
						 | 
					@ -51,10 +55,10 @@
 | 
				
			||||||
#define INVALID_PAGE (~(hpa_t)0)
 | 
					#define INVALID_PAGE (~(hpa_t)0)
 | 
				
			||||||
#define UNMAPPED_GVA (~(gpa_t)0)
 | 
					#define UNMAPPED_GVA (~(gpa_t)0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define KVM_MAX_VCPUS 1
 | 
					#define KVM_MAX_VCPUS 4
 | 
				
			||||||
#define KVM_ALIAS_SLOTS 4
 | 
					#define KVM_ALIAS_SLOTS 4
 | 
				
			||||||
#define KVM_MEMORY_SLOTS 4
 | 
					#define KVM_MEMORY_SLOTS 4
 | 
				
			||||||
#define KVM_NUM_MMU_PAGES 256
 | 
					#define KVM_NUM_MMU_PAGES 1024
 | 
				
			||||||
#define KVM_MIN_FREE_MMU_PAGES 5
 | 
					#define KVM_MIN_FREE_MMU_PAGES 5
 | 
				
			||||||
#define KVM_REFILL_PAGES 25
 | 
					#define KVM_REFILL_PAGES 25
 | 
				
			||||||
#define KVM_MAX_CPUID_ENTRIES 40
 | 
					#define KVM_MAX_CPUID_ENTRIES 40
 | 
				
			||||||
| 
						 | 
					@ -79,6 +83,11 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define KVM_PIO_PAGE_OFFSET 1
 | 
					#define KVM_PIO_PAGE_OFFSET 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * vcpu->requests bit members
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#define KVM_TLB_FLUSH 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Address types:
 | 
					 * Address types:
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
| 
						 | 
					@ -137,7 +146,7 @@ struct kvm_mmu_page {
 | 
				
			||||||
	gfn_t gfn;
 | 
						gfn_t gfn;
 | 
				
			||||||
	union kvm_mmu_page_role role;
 | 
						union kvm_mmu_page_role role;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	hpa_t page_hpa;
 | 
						u64 *spt;
 | 
				
			||||||
	unsigned long slot_bitmap; /* One bit set per slot which has memory
 | 
						unsigned long slot_bitmap; /* One bit set per slot which has memory
 | 
				
			||||||
				    * in this shadow page.
 | 
									    * in this shadow page.
 | 
				
			||||||
				    */
 | 
									    */
 | 
				
			||||||
| 
						 | 
					@ -232,6 +241,7 @@ struct kvm_pio_request {
 | 
				
			||||||
	struct page *guest_pages[2];
 | 
						struct page *guest_pages[2];
 | 
				
			||||||
	unsigned guest_page_offset;
 | 
						unsigned guest_page_offset;
 | 
				
			||||||
	int in;
 | 
						int in;
 | 
				
			||||||
 | 
						int port;
 | 
				
			||||||
	int size;
 | 
						int size;
 | 
				
			||||||
	int string;
 | 
						int string;
 | 
				
			||||||
	int down;
 | 
						int down;
 | 
				
			||||||
| 
						 | 
					@ -252,8 +262,70 @@ struct kvm_stat {
 | 
				
			||||||
	u32 halt_exits;
 | 
						u32 halt_exits;
 | 
				
			||||||
	u32 request_irq_exits;
 | 
						u32 request_irq_exits;
 | 
				
			||||||
	u32 irq_exits;
 | 
						u32 irq_exits;
 | 
				
			||||||
 | 
						u32 light_exits;
 | 
				
			||||||
 | 
						u32 efer_reload;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct kvm_io_device {
 | 
				
			||||||
 | 
						void (*read)(struct kvm_io_device *this,
 | 
				
			||||||
 | 
							     gpa_t addr,
 | 
				
			||||||
 | 
							     int len,
 | 
				
			||||||
 | 
							     void *val);
 | 
				
			||||||
 | 
						void (*write)(struct kvm_io_device *this,
 | 
				
			||||||
 | 
							      gpa_t addr,
 | 
				
			||||||
 | 
							      int len,
 | 
				
			||||||
 | 
							      const void *val);
 | 
				
			||||||
 | 
						int (*in_range)(struct kvm_io_device *this, gpa_t addr);
 | 
				
			||||||
 | 
						void (*destructor)(struct kvm_io_device *this);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						void             *private;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void kvm_iodevice_read(struct kvm_io_device *dev,
 | 
				
			||||||
 | 
									     gpa_t addr,
 | 
				
			||||||
 | 
									     int len,
 | 
				
			||||||
 | 
									     void *val)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						dev->read(dev, addr, len, val);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void kvm_iodevice_write(struct kvm_io_device *dev,
 | 
				
			||||||
 | 
									      gpa_t addr,
 | 
				
			||||||
 | 
									      int len,
 | 
				
			||||||
 | 
									      const void *val)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						dev->write(dev, addr, len, val);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return dev->in_range(dev, addr);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (dev->destructor)
 | 
				
			||||||
 | 
							dev->destructor(dev);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * It would be nice to use something smarter than a linear search, TBD...
 | 
				
			||||||
 | 
					 * Thankfully we dont expect many devices to register (famous last words :),
 | 
				
			||||||
 | 
					 * so until then it will suffice.  At least its abstracted so we can change
 | 
				
			||||||
 | 
					 * in one place.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					struct kvm_io_bus {
 | 
				
			||||||
 | 
						int                   dev_count;
 | 
				
			||||||
 | 
					#define NR_IOBUS_DEVS 6
 | 
				
			||||||
 | 
						struct kvm_io_device *devs[NR_IOBUS_DEVS];
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kvm_io_bus_init(struct kvm_io_bus *bus);
 | 
				
			||||||
 | 
					void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 | 
				
			||||||
 | 
					struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
 | 
				
			||||||
 | 
					void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 | 
				
			||||||
 | 
								     struct kvm_io_device *dev);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct kvm_vcpu {
 | 
					struct kvm_vcpu {
 | 
				
			||||||
	struct kvm *kvm;
 | 
						struct kvm *kvm;
 | 
				
			||||||
	union {
 | 
						union {
 | 
				
			||||||
| 
						 | 
					@ -266,6 +338,8 @@ struct kvm_vcpu {
 | 
				
			||||||
	u64 host_tsc;
 | 
						u64 host_tsc;
 | 
				
			||||||
	struct kvm_run *run;
 | 
						struct kvm_run *run;
 | 
				
			||||||
	int interrupt_window_open;
 | 
						int interrupt_window_open;
 | 
				
			||||||
 | 
						int guest_mode;
 | 
				
			||||||
 | 
						unsigned long requests;
 | 
				
			||||||
	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
 | 
						unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
 | 
				
			||||||
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
 | 
					#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
 | 
				
			||||||
	unsigned long irq_pending[NR_IRQ_WORDS];
 | 
						unsigned long irq_pending[NR_IRQ_WORDS];
 | 
				
			||||||
| 
						 | 
					@ -285,15 +359,20 @@ struct kvm_vcpu {
 | 
				
			||||||
	u64 apic_base;
 | 
						u64 apic_base;
 | 
				
			||||||
	u64 ia32_misc_enable_msr;
 | 
						u64 ia32_misc_enable_msr;
 | 
				
			||||||
	int nmsrs;
 | 
						int nmsrs;
 | 
				
			||||||
 | 
						int save_nmsrs;
 | 
				
			||||||
 | 
						int msr_offset_efer;
 | 
				
			||||||
 | 
					#ifdef CONFIG_X86_64
 | 
				
			||||||
 | 
						int msr_offset_kernel_gs_base;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
	struct vmx_msr_entry *guest_msrs;
 | 
						struct vmx_msr_entry *guest_msrs;
 | 
				
			||||||
	struct vmx_msr_entry *host_msrs;
 | 
						struct vmx_msr_entry *host_msrs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct list_head free_pages;
 | 
					 | 
				
			||||||
	struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES];
 | 
					 | 
				
			||||||
	struct kvm_mmu mmu;
 | 
						struct kvm_mmu mmu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	struct kvm_mmu_memory_cache mmu_pte_chain_cache;
 | 
						struct kvm_mmu_memory_cache mmu_pte_chain_cache;
 | 
				
			||||||
	struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
 | 
						struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
 | 
				
			||||||
 | 
						struct kvm_mmu_memory_cache mmu_page_cache;
 | 
				
			||||||
 | 
						struct kvm_mmu_memory_cache mmu_page_header_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	gfn_t last_pt_write_gfn;
 | 
						gfn_t last_pt_write_gfn;
 | 
				
			||||||
	int   last_pt_write_count;
 | 
						int   last_pt_write_count;
 | 
				
			||||||
| 
						 | 
					@ -305,6 +384,11 @@ struct kvm_vcpu {
 | 
				
			||||||
	char *guest_fx_image;
 | 
						char *guest_fx_image;
 | 
				
			||||||
	int fpu_active;
 | 
						int fpu_active;
 | 
				
			||||||
	int guest_fpu_loaded;
 | 
						int guest_fpu_loaded;
 | 
				
			||||||
 | 
						struct vmx_host_state {
 | 
				
			||||||
 | 
							int loaded;
 | 
				
			||||||
 | 
							u16 fs_sel, gs_sel, ldt_sel;
 | 
				
			||||||
 | 
							int fs_gs_ldt_reload_needed;
 | 
				
			||||||
 | 
						} vmx_host_state;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int mmio_needed;
 | 
						int mmio_needed;
 | 
				
			||||||
	int mmio_read_completed;
 | 
						int mmio_read_completed;
 | 
				
			||||||
| 
						 | 
					@ -331,6 +415,7 @@ struct kvm_vcpu {
 | 
				
			||||||
			u32 ar;
 | 
								u32 ar;
 | 
				
			||||||
		} tr, es, ds, fs, gs;
 | 
							} tr, es, ds, fs, gs;
 | 
				
			||||||
	} rmode;
 | 
						} rmode;
 | 
				
			||||||
 | 
						int halt_request; /* real mode on Intel only */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int cpuid_nent;
 | 
						int cpuid_nent;
 | 
				
			||||||
	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
 | 
						struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
 | 
				
			||||||
| 
						 | 
					@ -362,12 +447,15 @@ struct kvm {
 | 
				
			||||||
	struct list_head active_mmu_pages;
 | 
						struct list_head active_mmu_pages;
 | 
				
			||||||
	int n_free_mmu_pages;
 | 
						int n_free_mmu_pages;
 | 
				
			||||||
	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 | 
						struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 | 
				
			||||||
 | 
						int nvcpus;
 | 
				
			||||||
	struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
 | 
						struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
 | 
				
			||||||
	int memory_config_version;
 | 
						int memory_config_version;
 | 
				
			||||||
	int busy;
 | 
						int busy;
 | 
				
			||||||
	unsigned long rmap_overflow;
 | 
						unsigned long rmap_overflow;
 | 
				
			||||||
	struct list_head vm_list;
 | 
						struct list_head vm_list;
 | 
				
			||||||
	struct file *filp;
 | 
						struct file *filp;
 | 
				
			||||||
 | 
						struct kvm_io_bus mmio_bus;
 | 
				
			||||||
 | 
						struct kvm_io_bus pio_bus;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct descriptor_table {
 | 
					struct descriptor_table {
 | 
				
			||||||
| 
						 | 
					@ -488,6 +576,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 | 
				
			||||||
		  int size, unsigned long count, int string, int down,
 | 
							  int size, unsigned long count, int string, int down,
 | 
				
			||||||
		  gva_t address, int rep, unsigned port);
 | 
							  gva_t address, int rep, unsigned port);
 | 
				
			||||||
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 | 
					void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 | 
				
			||||||
 | 
					int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 | 
				
			||||||
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
 | 
					int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
 | 
				
			||||||
int emulate_clts(struct kvm_vcpu *vcpu);
 | 
					int emulate_clts(struct kvm_vcpu *vcpu);
 | 
				
			||||||
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
 | 
					int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
 | 
				
			||||||
| 
						 | 
					@ -511,6 +600,7 @@ void save_msrs(struct vmx_msr_entry *e, int n);
 | 
				
			||||||
void kvm_resched(struct kvm_vcpu *vcpu);
 | 
					void kvm_resched(struct kvm_vcpu *vcpu);
 | 
				
			||||||
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 | 
					void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 | 
				
			||||||
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 | 
					void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 | 
				
			||||||
 | 
					void kvm_flush_remote_tlbs(struct kvm *kvm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_read_guest(struct kvm_vcpu *vcpu,
 | 
					int kvm_read_guest(struct kvm_vcpu *vcpu,
 | 
				
			||||||
	       gva_t addr,
 | 
						       gva_t addr,
 | 
				
			||||||
| 
						 | 
					@ -524,10 +614,12 @@ int kvm_write_guest(struct kvm_vcpu *vcpu,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
unsigned long segment_base(u16 selector);
 | 
					unsigned long segment_base(u16 selector);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
 | 
					void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 | 
				
			||||||
void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
 | 
							       const u8 *old, const u8 *new, int bytes);
 | 
				
			||||||
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 | 
					int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 | 
				
			||||||
void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 | 
					void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 | 
				
			||||||
 | 
					int kvm_mmu_load(struct kvm_vcpu *vcpu);
 | 
				
			||||||
 | 
					void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
 | 
					int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -539,6 +631,14 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 | 
				
			||||||
	return vcpu->mmu.page_fault(vcpu, gva, error_code);
 | 
						return vcpu->mmu.page_fault(vcpu, gva, error_code);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (likely(vcpu->mmu.root_hpa != INVALID_PAGE))
 | 
				
			||||||
 | 
							return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return kvm_mmu_load(vcpu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int is_long_mode(struct kvm_vcpu *vcpu)
 | 
					static inline int is_long_mode(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					#ifdef CONFIG_X86_64
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,34 +16,33 @@
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "kvm.h"
 | 
					#include "kvm.h"
 | 
				
			||||||
 | 
					#include "x86_emulate.h"
 | 
				
			||||||
 | 
					#include "segment_descriptor.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/kvm.h>
 | 
					#include <linux/kvm.h>
 | 
				
			||||||
#include <linux/module.h>
 | 
					#include <linux/module.h>
 | 
				
			||||||
#include <linux/errno.h>
 | 
					#include <linux/errno.h>
 | 
				
			||||||
#include <linux/magic.h>
 | 
					 | 
				
			||||||
#include <asm/processor.h>
 | 
					 | 
				
			||||||
#include <linux/percpu.h>
 | 
					#include <linux/percpu.h>
 | 
				
			||||||
#include <linux/gfp.h>
 | 
					#include <linux/gfp.h>
 | 
				
			||||||
#include <asm/msr.h>
 | 
					 | 
				
			||||||
#include <linux/mm.h>
 | 
					#include <linux/mm.h>
 | 
				
			||||||
#include <linux/miscdevice.h>
 | 
					#include <linux/miscdevice.h>
 | 
				
			||||||
#include <linux/vmalloc.h>
 | 
					#include <linux/vmalloc.h>
 | 
				
			||||||
#include <asm/uaccess.h>
 | 
					 | 
				
			||||||
#include <linux/reboot.h>
 | 
					#include <linux/reboot.h>
 | 
				
			||||||
#include <asm/io.h>
 | 
					 | 
				
			||||||
#include <linux/debugfs.h>
 | 
					#include <linux/debugfs.h>
 | 
				
			||||||
#include <linux/highmem.h>
 | 
					#include <linux/highmem.h>
 | 
				
			||||||
#include <linux/file.h>
 | 
					#include <linux/file.h>
 | 
				
			||||||
#include <asm/desc.h>
 | 
					 | 
				
			||||||
#include <linux/sysdev.h>
 | 
					#include <linux/sysdev.h>
 | 
				
			||||||
#include <linux/cpu.h>
 | 
					#include <linux/cpu.h>
 | 
				
			||||||
#include <linux/file.h>
 | 
					 | 
				
			||||||
#include <linux/fs.h>
 | 
					 | 
				
			||||||
#include <linux/mount.h>
 | 
					 | 
				
			||||||
#include <linux/sched.h>
 | 
					#include <linux/sched.h>
 | 
				
			||||||
 | 
					#include <linux/cpumask.h>
 | 
				
			||||||
 | 
					#include <linux/smp.h>
 | 
				
			||||||
 | 
					#include <linux/anon_inodes.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "x86_emulate.h"
 | 
					#include <asm/processor.h>
 | 
				
			||||||
#include "segment_descriptor.h"
 | 
					#include <asm/msr.h>
 | 
				
			||||||
 | 
					#include <asm/io.h>
 | 
				
			||||||
 | 
					#include <asm/uaccess.h>
 | 
				
			||||||
 | 
					#include <asm/desc.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MODULE_AUTHOR("Qumranet");
 | 
					MODULE_AUTHOR("Qumranet");
 | 
				
			||||||
MODULE_LICENSE("GPL");
 | 
					MODULE_LICENSE("GPL");
 | 
				
			||||||
| 
						 | 
					@ -51,8 +50,12 @@ MODULE_LICENSE("GPL");
 | 
				
			||||||
static DEFINE_SPINLOCK(kvm_lock);
 | 
					static DEFINE_SPINLOCK(kvm_lock);
 | 
				
			||||||
static LIST_HEAD(vm_list);
 | 
					static LIST_HEAD(vm_list);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static cpumask_t cpus_hardware_enabled;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct kvm_arch_ops *kvm_arch_ops;
 | 
					struct kvm_arch_ops *kvm_arch_ops;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void hardware_disable(void *ignored);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 | 
					#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct kvm_stats_debugfs_item {
 | 
					static struct kvm_stats_debugfs_item {
 | 
				
			||||||
| 
						 | 
					@ -72,13 +75,13 @@ static struct kvm_stats_debugfs_item {
 | 
				
			||||||
	{ "halt_exits", STAT_OFFSET(halt_exits) },
 | 
						{ "halt_exits", STAT_OFFSET(halt_exits) },
 | 
				
			||||||
	{ "request_irq", STAT_OFFSET(request_irq_exits) },
 | 
						{ "request_irq", STAT_OFFSET(request_irq_exits) },
 | 
				
			||||||
	{ "irq_exits", STAT_OFFSET(irq_exits) },
 | 
						{ "irq_exits", STAT_OFFSET(irq_exits) },
 | 
				
			||||||
 | 
						{ "light_exits", STAT_OFFSET(light_exits) },
 | 
				
			||||||
 | 
						{ "efer_reload", STAT_OFFSET(efer_reload) },
 | 
				
			||||||
	{ NULL }
 | 
						{ NULL }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct dentry *debugfs_dir;
 | 
					static struct dentry *debugfs_dir;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct vfsmount *kvmfs_mnt;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define MAX_IO_MSRS 256
 | 
					#define MAX_IO_MSRS 256
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
 | 
					#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
 | 
				
			||||||
| 
						 | 
					@ -100,55 +103,6 @@ struct segment_descriptor_64 {
 | 
				
			||||||
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 | 
					static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 | 
				
			||||||
			   unsigned long arg);
 | 
								   unsigned long arg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct inode *kvmfs_inode(struct file_operations *fops)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	int error = -ENOMEM;
 | 
					 | 
				
			||||||
	struct inode *inode = new_inode(kvmfs_mnt->mnt_sb);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!inode)
 | 
					 | 
				
			||||||
		goto eexit_1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	inode->i_fop = fops;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Mark the inode dirty from the very beginning,
 | 
					 | 
				
			||||||
	 * that way it will never be moved to the dirty
 | 
					 | 
				
			||||||
	 * list because mark_inode_dirty() will think
 | 
					 | 
				
			||||||
	 * that it already _is_ on the dirty list.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	inode->i_state = I_DIRTY;
 | 
					 | 
				
			||||||
	inode->i_mode = S_IRUSR | S_IWUSR;
 | 
					 | 
				
			||||||
	inode->i_uid = current->fsuid;
 | 
					 | 
				
			||||||
	inode->i_gid = current->fsgid;
 | 
					 | 
				
			||||||
	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 | 
					 | 
				
			||||||
	return inode;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
eexit_1:
 | 
					 | 
				
			||||||
	return ERR_PTR(error);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static struct file *kvmfs_file(struct inode *inode, void *private_data)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	struct file *file = get_empty_filp();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!file)
 | 
					 | 
				
			||||||
		return ERR_PTR(-ENFILE);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	file->f_path.mnt = mntget(kvmfs_mnt);
 | 
					 | 
				
			||||||
	file->f_path.dentry = d_alloc_anon(inode);
 | 
					 | 
				
			||||||
	if (!file->f_path.dentry)
 | 
					 | 
				
			||||||
		return ERR_PTR(-ENOMEM);
 | 
					 | 
				
			||||||
	file->f_mapping = inode->i_mapping;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	file->f_pos = 0;
 | 
					 | 
				
			||||||
	file->f_flags = O_RDWR;
 | 
					 | 
				
			||||||
	file->f_op = inode->i_fop;
 | 
					 | 
				
			||||||
	file->f_mode = FMODE_READ | FMODE_WRITE;
 | 
					 | 
				
			||||||
	file->f_version = 0;
 | 
					 | 
				
			||||||
	file->private_data = private_data;
 | 
					 | 
				
			||||||
	return file;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
unsigned long segment_base(u16 selector)
 | 
					unsigned long segment_base(u16 selector)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct descriptor_table gdt;
 | 
						struct descriptor_table gdt;
 | 
				
			||||||
| 
						 | 
					@ -307,6 +261,48 @@ static void vcpu_put(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	mutex_unlock(&vcpu->mutex);
 | 
						mutex_unlock(&vcpu->mutex);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void ack_flush(void *_completed)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						atomic_t *completed = _completed;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						atomic_inc(completed);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kvm_flush_remote_tlbs(struct kvm *kvm)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i, cpu, needed;
 | 
				
			||||||
 | 
						cpumask_t cpus;
 | 
				
			||||||
 | 
						struct kvm_vcpu *vcpu;
 | 
				
			||||||
 | 
						atomic_t completed;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						atomic_set(&completed, 0);
 | 
				
			||||||
 | 
						cpus_clear(cpus);
 | 
				
			||||||
 | 
						needed = 0;
 | 
				
			||||||
 | 
						for (i = 0; i < kvm->nvcpus; ++i) {
 | 
				
			||||||
 | 
							vcpu = &kvm->vcpus[i];
 | 
				
			||||||
 | 
							if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							cpu = vcpu->cpu;
 | 
				
			||||||
 | 
							if (cpu != -1 && cpu != raw_smp_processor_id())
 | 
				
			||||||
 | 
								if (!cpu_isset(cpu, cpus)) {
 | 
				
			||||||
 | 
									cpu_set(cpu, cpus);
 | 
				
			||||||
 | 
									++needed;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * We really want smp_call_function_mask() here.  But that's not
 | 
				
			||||||
 | 
						 * available, so ipi all cpus in parallel and wait for them
 | 
				
			||||||
 | 
						 * to complete.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus))
 | 
				
			||||||
 | 
							smp_call_function_single(cpu, ack_flush, &completed, 1, 0);
 | 
				
			||||||
 | 
						while (atomic_read(&completed) != needed) {
 | 
				
			||||||
 | 
							cpu_relax();
 | 
				
			||||||
 | 
							barrier();
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct kvm *kvm_create_vm(void)
 | 
					static struct kvm *kvm_create_vm(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
 | 
						struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
 | 
				
			||||||
| 
						 | 
					@ -315,8 +311,13 @@ static struct kvm *kvm_create_vm(void)
 | 
				
			||||||
	if (!kvm)
 | 
						if (!kvm)
 | 
				
			||||||
		return ERR_PTR(-ENOMEM);
 | 
							return ERR_PTR(-ENOMEM);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						kvm_io_bus_init(&kvm->pio_bus);
 | 
				
			||||||
	spin_lock_init(&kvm->lock);
 | 
						spin_lock_init(&kvm->lock);
 | 
				
			||||||
	INIT_LIST_HEAD(&kvm->active_mmu_pages);
 | 
						INIT_LIST_HEAD(&kvm->active_mmu_pages);
 | 
				
			||||||
 | 
						spin_lock(&kvm_lock);
 | 
				
			||||||
 | 
						list_add(&kvm->vm_list, &vm_list);
 | 
				
			||||||
 | 
						spin_unlock(&kvm_lock);
 | 
				
			||||||
 | 
						kvm_io_bus_init(&kvm->mmio_bus);
 | 
				
			||||||
	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 | 
						for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 | 
				
			||||||
		struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 | 
							struct kvm_vcpu *vcpu = &kvm->vcpus[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -324,10 +325,6 @@ static struct kvm *kvm_create_vm(void)
 | 
				
			||||||
		vcpu->cpu = -1;
 | 
							vcpu->cpu = -1;
 | 
				
			||||||
		vcpu->kvm = kvm;
 | 
							vcpu->kvm = kvm;
 | 
				
			||||||
		vcpu->mmu.root_hpa = INVALID_PAGE;
 | 
							vcpu->mmu.root_hpa = INVALID_PAGE;
 | 
				
			||||||
		INIT_LIST_HEAD(&vcpu->free_pages);
 | 
					 | 
				
			||||||
		spin_lock(&kvm_lock);
 | 
					 | 
				
			||||||
		list_add(&kvm->vm_list, &vm_list);
 | 
					 | 
				
			||||||
		spin_unlock(&kvm_lock);
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return kvm;
 | 
						return kvm;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -380,6 +377,16 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (!vcpu->vmcs)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						vcpu_load(vcpu);
 | 
				
			||||||
 | 
						kvm_mmu_unload(vcpu);
 | 
				
			||||||
 | 
						vcpu_put(vcpu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
 | 
					static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	if (!vcpu->vmcs)
 | 
						if (!vcpu->vmcs)
 | 
				
			||||||
| 
						 | 
					@ -400,6 +407,11 @@ static void kvm_free_vcpus(struct kvm *kvm)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned int i;
 | 
						unsigned int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Unpin any mmu pages first.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						for (i = 0; i < KVM_MAX_VCPUS; ++i)
 | 
				
			||||||
 | 
							kvm_unload_vcpu_mmu(&kvm->vcpus[i]);
 | 
				
			||||||
	for (i = 0; i < KVM_MAX_VCPUS; ++i)
 | 
						for (i = 0; i < KVM_MAX_VCPUS; ++i)
 | 
				
			||||||
		kvm_free_vcpu(&kvm->vcpus[i]);
 | 
							kvm_free_vcpu(&kvm->vcpus[i]);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -414,6 +426,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 | 
				
			||||||
	spin_lock(&kvm_lock);
 | 
						spin_lock(&kvm_lock);
 | 
				
			||||||
	list_del(&kvm->vm_list);
 | 
						list_del(&kvm->vm_list);
 | 
				
			||||||
	spin_unlock(&kvm_lock);
 | 
						spin_unlock(&kvm_lock);
 | 
				
			||||||
 | 
						kvm_io_bus_destroy(&kvm->pio_bus);
 | 
				
			||||||
 | 
						kvm_io_bus_destroy(&kvm->mmio_bus);
 | 
				
			||||||
	kvm_free_vcpus(kvm);
 | 
						kvm_free_vcpus(kvm);
 | 
				
			||||||
	kvm_free_physmem(kvm);
 | 
						kvm_free_physmem(kvm);
 | 
				
			||||||
	kfree(kvm);
 | 
						kfree(kvm);
 | 
				
			||||||
| 
						 | 
					@ -969,7 +983,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page);
 | 
				
			||||||
void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 | 
					void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
	struct kvm_memory_slot *memslot = NULL;
 | 
						struct kvm_memory_slot *memslot;
 | 
				
			||||||
	unsigned long rel_gfn;
 | 
						unsigned long rel_gfn;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < kvm->nmemslots; ++i) {
 | 
						for (i = 0; i < kvm->nmemslots; ++i) {
 | 
				
			||||||
| 
						 | 
					@ -978,7 +992,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 | 
				
			||||||
		if (gfn >= memslot->base_gfn
 | 
							if (gfn >= memslot->base_gfn
 | 
				
			||||||
		    && gfn < memslot->base_gfn + memslot->npages) {
 | 
							    && gfn < memslot->base_gfn + memslot->npages) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (!memslot || !memslot->dirty_bitmap)
 | 
								if (!memslot->dirty_bitmap)
 | 
				
			||||||
				return;
 | 
									return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			rel_gfn = gfn - memslot->base_gfn;
 | 
								rel_gfn = gfn - memslot->base_gfn;
 | 
				
			||||||
| 
						 | 
					@ -1037,12 +1051,31 @@ static int emulator_write_std(unsigned long addr,
 | 
				
			||||||
	return X86EMUL_UNHANDLEABLE;
 | 
						return X86EMUL_UNHANDLEABLE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
 | 
				
			||||||
 | 
											gpa_t addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Note that its important to have this wrapper function because
 | 
				
			||||||
 | 
						 * in the very near future we will be checking for MMIOs against
 | 
				
			||||||
 | 
						 * the LAPIC as well as the general MMIO bus
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
 | 
				
			||||||
 | 
										       gpa_t addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int emulator_read_emulated(unsigned long addr,
 | 
					static int emulator_read_emulated(unsigned long addr,
 | 
				
			||||||
				  void *val,
 | 
									  void *val,
 | 
				
			||||||
				  unsigned int bytes,
 | 
									  unsigned int bytes,
 | 
				
			||||||
				  struct x86_emulate_ctxt *ctxt)
 | 
									  struct x86_emulate_ctxt *ctxt)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm_vcpu *vcpu = ctxt->vcpu;
 | 
						struct kvm_vcpu      *vcpu = ctxt->vcpu;
 | 
				
			||||||
 | 
						struct kvm_io_device *mmio_dev;
 | 
				
			||||||
 | 
						gpa_t                 gpa;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (vcpu->mmio_read_completed) {
 | 
						if (vcpu->mmio_read_completed) {
 | 
				
			||||||
		memcpy(val, vcpu->mmio_data, bytes);
 | 
							memcpy(val, vcpu->mmio_data, bytes);
 | 
				
			||||||
| 
						 | 
					@ -1051,18 +1084,26 @@ static int emulator_read_emulated(unsigned long addr,
 | 
				
			||||||
	} else if (emulator_read_std(addr, val, bytes, ctxt)
 | 
						} else if (emulator_read_std(addr, val, bytes, ctxt)
 | 
				
			||||||
		   == X86EMUL_CONTINUE)
 | 
							   == X86EMUL_CONTINUE)
 | 
				
			||||||
		return X86EMUL_CONTINUE;
 | 
							return X86EMUL_CONTINUE;
 | 
				
			||||||
	else {
 | 
					 | 
				
			||||||
		gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (gpa == UNMAPPED_GVA)
 | 
						gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 | 
				
			||||||
			return X86EMUL_PROPAGATE_FAULT;
 | 
						if (gpa == UNMAPPED_GVA)
 | 
				
			||||||
		vcpu->mmio_needed = 1;
 | 
							return X86EMUL_PROPAGATE_FAULT;
 | 
				
			||||||
		vcpu->mmio_phys_addr = gpa;
 | 
					 | 
				
			||||||
		vcpu->mmio_size = bytes;
 | 
					 | 
				
			||||||
		vcpu->mmio_is_write = 0;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		return X86EMUL_UNHANDLEABLE;
 | 
						/*
 | 
				
			||||||
 | 
						 * Is this MMIO handled locally?
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
 | 
				
			||||||
 | 
						if (mmio_dev) {
 | 
				
			||||||
 | 
							kvm_iodevice_read(mmio_dev, gpa, bytes, val);
 | 
				
			||||||
 | 
							return X86EMUL_CONTINUE;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						vcpu->mmio_needed = 1;
 | 
				
			||||||
 | 
						vcpu->mmio_phys_addr = gpa;
 | 
				
			||||||
 | 
						vcpu->mmio_size = bytes;
 | 
				
			||||||
 | 
						vcpu->mmio_is_write = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return X86EMUL_UNHANDLEABLE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 | 
					static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 | 
				
			||||||
| 
						 | 
					@ -1070,18 +1111,20 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
	void *virt;
 | 
						void *virt;
 | 
				
			||||||
 | 
						unsigned offset = offset_in_page(gpa);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
 | 
						if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
 | 
						page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
 | 
				
			||||||
	if (!page)
 | 
						if (!page)
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	kvm_mmu_pre_write(vcpu, gpa, bytes);
 | 
					 | 
				
			||||||
	mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
 | 
						mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
 | 
				
			||||||
	virt = kmap_atomic(page, KM_USER0);
 | 
						virt = kmap_atomic(page, KM_USER0);
 | 
				
			||||||
	memcpy(virt + offset_in_page(gpa), val, bytes);
 | 
						if (memcmp(virt + offset_in_page(gpa), val, bytes)) {
 | 
				
			||||||
 | 
							kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes);
 | 
				
			||||||
 | 
							memcpy(virt + offset_in_page(gpa), val, bytes);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	kunmap_atomic(virt, KM_USER0);
 | 
						kunmap_atomic(virt, KM_USER0);
 | 
				
			||||||
	kvm_mmu_post_write(vcpu, gpa, bytes);
 | 
					 | 
				
			||||||
	return 1;
 | 
						return 1;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1090,8 +1133,9 @@ static int emulator_write_emulated(unsigned long addr,
 | 
				
			||||||
				   unsigned int bytes,
 | 
									   unsigned int bytes,
 | 
				
			||||||
				   struct x86_emulate_ctxt *ctxt)
 | 
									   struct x86_emulate_ctxt *ctxt)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm_vcpu *vcpu = ctxt->vcpu;
 | 
						struct kvm_vcpu      *vcpu = ctxt->vcpu;
 | 
				
			||||||
	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 | 
						struct kvm_io_device *mmio_dev;
 | 
				
			||||||
 | 
						gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (gpa == UNMAPPED_GVA) {
 | 
						if (gpa == UNMAPPED_GVA) {
 | 
				
			||||||
		kvm_arch_ops->inject_page_fault(vcpu, addr, 2);
 | 
							kvm_arch_ops->inject_page_fault(vcpu, addr, 2);
 | 
				
			||||||
| 
						 | 
					@ -1101,6 +1145,15 @@ static int emulator_write_emulated(unsigned long addr,
 | 
				
			||||||
	if (emulator_write_phys(vcpu, gpa, val, bytes))
 | 
						if (emulator_write_phys(vcpu, gpa, val, bytes))
 | 
				
			||||||
		return X86EMUL_CONTINUE;
 | 
							return X86EMUL_CONTINUE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Is this MMIO handled locally?
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
 | 
				
			||||||
 | 
						if (mmio_dev) {
 | 
				
			||||||
 | 
							kvm_iodevice_write(mmio_dev, gpa, bytes, val);
 | 
				
			||||||
 | 
							return X86EMUL_CONTINUE;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	vcpu->mmio_needed = 1;
 | 
						vcpu->mmio_needed = 1;
 | 
				
			||||||
	vcpu->mmio_phys_addr = gpa;
 | 
						vcpu->mmio_phys_addr = gpa;
 | 
				
			||||||
	vcpu->mmio_size = bytes;
 | 
						vcpu->mmio_size = bytes;
 | 
				
			||||||
| 
						 | 
					@ -1269,6 +1322,17 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(emulate_instruction);
 | 
					EXPORT_SYMBOL_GPL(emulate_instruction);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (vcpu->irq_summary)
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						vcpu->run->exit_reason = KVM_EXIT_HLT;
 | 
				
			||||||
 | 
						++vcpu->stat.halt_exits;
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
 | 
					int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
 | 
						unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
 | 
				
			||||||
| 
						 | 
					@ -1469,6 +1533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 | 
				
			||||||
	case MSR_IA32_MC0_MISC+16:
 | 
						case MSR_IA32_MC0_MISC+16:
 | 
				
			||||||
	case MSR_IA32_UCODE_REV:
 | 
						case MSR_IA32_UCODE_REV:
 | 
				
			||||||
	case MSR_IA32_PERF_STATUS:
 | 
						case MSR_IA32_PERF_STATUS:
 | 
				
			||||||
 | 
						case MSR_IA32_EBL_CR_POWERON:
 | 
				
			||||||
		/* MTRR registers */
 | 
							/* MTRR registers */
 | 
				
			||||||
	case 0xfe:
 | 
						case 0xfe:
 | 
				
			||||||
	case 0x200 ... 0x2ff:
 | 
						case 0x200 ... 0x2ff:
 | 
				
			||||||
| 
						 | 
					@ -1727,6 +1792,20 @@ static int complete_pio(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						/* TODO: String I/O for in kernel device */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (vcpu->pio.in)
 | 
				
			||||||
 | 
							kvm_iodevice_read(pio_dev, vcpu->pio.port,
 | 
				
			||||||
 | 
									  vcpu->pio.size,
 | 
				
			||||||
 | 
									  vcpu->pio_data);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							kvm_iodevice_write(pio_dev, vcpu->pio.port,
 | 
				
			||||||
 | 
									   vcpu->pio.size,
 | 
				
			||||||
 | 
									   vcpu->pio_data);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 | 
					int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 | 
				
			||||||
		  int size, unsigned long count, int string, int down,
 | 
							  int size, unsigned long count, int string, int down,
 | 
				
			||||||
		  gva_t address, int rep, unsigned port)
 | 
							  gva_t address, int rep, unsigned port)
 | 
				
			||||||
| 
						 | 
					@ -1735,6 +1814,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
	int nr_pages = 1;
 | 
						int nr_pages = 1;
 | 
				
			||||||
	struct page *page;
 | 
						struct page *page;
 | 
				
			||||||
 | 
						struct kvm_io_device *pio_dev;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	vcpu->run->exit_reason = KVM_EXIT_IO;
 | 
						vcpu->run->exit_reason = KVM_EXIT_IO;
 | 
				
			||||||
	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
 | 
						vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
 | 
				
			||||||
| 
						 | 
					@ -1746,17 +1826,27 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 | 
				
			||||||
	vcpu->pio.cur_count = count;
 | 
						vcpu->pio.cur_count = count;
 | 
				
			||||||
	vcpu->pio.size = size;
 | 
						vcpu->pio.size = size;
 | 
				
			||||||
	vcpu->pio.in = in;
 | 
						vcpu->pio.in = in;
 | 
				
			||||||
 | 
						vcpu->pio.port = port;
 | 
				
			||||||
	vcpu->pio.string = string;
 | 
						vcpu->pio.string = string;
 | 
				
			||||||
	vcpu->pio.down = down;
 | 
						vcpu->pio.down = down;
 | 
				
			||||||
	vcpu->pio.guest_page_offset = offset_in_page(address);
 | 
						vcpu->pio.guest_page_offset = offset_in_page(address);
 | 
				
			||||||
	vcpu->pio.rep = rep;
 | 
						vcpu->pio.rep = rep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pio_dev = vcpu_find_pio_dev(vcpu, port);
 | 
				
			||||||
	if (!string) {
 | 
						if (!string) {
 | 
				
			||||||
		kvm_arch_ops->cache_regs(vcpu);
 | 
							kvm_arch_ops->cache_regs(vcpu);
 | 
				
			||||||
		memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
 | 
							memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
 | 
				
			||||||
		kvm_arch_ops->decache_regs(vcpu);
 | 
							kvm_arch_ops->decache_regs(vcpu);
 | 
				
			||||||
 | 
							if (pio_dev) {
 | 
				
			||||||
 | 
								kernel_pio(pio_dev, vcpu);
 | 
				
			||||||
 | 
								complete_pio(vcpu);
 | 
				
			||||||
 | 
								return 1;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						/* TODO: String I/O for in kernel device */
 | 
				
			||||||
 | 
						if (pio_dev)
 | 
				
			||||||
 | 
							printk(KERN_ERR "kvm_setup_pio: no string io support\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!count) {
 | 
						if (!count) {
 | 
				
			||||||
		kvm_arch_ops->skip_emulated_instruction(vcpu);
 | 
							kvm_arch_ops->skip_emulated_instruction(vcpu);
 | 
				
			||||||
| 
						 | 
					@ -2273,34 +2363,12 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	struct inode *inode;
 | 
						struct inode *inode;
 | 
				
			||||||
	struct file *file;
 | 
						struct file *file;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						r = anon_inode_getfd(&fd, &inode, &file,
 | 
				
			||||||
 | 
								     "kvm-vcpu", &kvm_vcpu_fops, vcpu);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
	atomic_inc(&vcpu->kvm->filp->f_count);
 | 
						atomic_inc(&vcpu->kvm->filp->f_count);
 | 
				
			||||||
	inode = kvmfs_inode(&kvm_vcpu_fops);
 | 
					 | 
				
			||||||
	if (IS_ERR(inode)) {
 | 
					 | 
				
			||||||
		r = PTR_ERR(inode);
 | 
					 | 
				
			||||||
		goto out1;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	file = kvmfs_file(inode, vcpu);
 | 
					 | 
				
			||||||
	if (IS_ERR(file)) {
 | 
					 | 
				
			||||||
		r = PTR_ERR(file);
 | 
					 | 
				
			||||||
		goto out2;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	r = get_unused_fd();
 | 
					 | 
				
			||||||
	if (r < 0)
 | 
					 | 
				
			||||||
		goto out3;
 | 
					 | 
				
			||||||
	fd = r;
 | 
					 | 
				
			||||||
	fd_install(fd, file);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return fd;
 | 
						return fd;
 | 
				
			||||||
 | 
					 | 
				
			||||||
out3:
 | 
					 | 
				
			||||||
	fput(file);
 | 
					 | 
				
			||||||
out2:
 | 
					 | 
				
			||||||
	iput(inode);
 | 
					 | 
				
			||||||
out1:
 | 
					 | 
				
			||||||
	fput(vcpu->kvm->filp);
 | 
					 | 
				
			||||||
	return r;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -2363,6 +2431,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 | 
				
			||||||
	if (r < 0)
 | 
						if (r < 0)
 | 
				
			||||||
		goto out_free_vcpus;
 | 
							goto out_free_vcpus;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&kvm_lock);
 | 
				
			||||||
 | 
						if (n >= kvm->nvcpus)
 | 
				
			||||||
 | 
							kvm->nvcpus = n + 1;
 | 
				
			||||||
 | 
						spin_unlock(&kvm_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return r;
 | 
						return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out_free_vcpus:
 | 
					out_free_vcpus:
 | 
				
			||||||
| 
						 | 
					@ -2376,6 +2449,27 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 | 
				
			||||||
	return r;
 | 
						return r;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						u64 efer;
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
						struct kvm_cpuid_entry *e, *entry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rdmsrl(MSR_EFER, efer);
 | 
				
			||||||
 | 
						entry = NULL;
 | 
				
			||||||
 | 
						for (i = 0; i < vcpu->cpuid_nent; ++i) {
 | 
				
			||||||
 | 
							e = &vcpu->cpuid_entries[i];
 | 
				
			||||||
 | 
							if (e->function == 0x80000001) {
 | 
				
			||||||
 | 
								entry = e;
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) {
 | 
				
			||||||
 | 
							entry->edx &= ~(1 << 20);
 | 
				
			||||||
 | 
							printk(KERN_INFO ": guest NX capability removed\n");
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 | 
					static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 | 
				
			||||||
				    struct kvm_cpuid *cpuid,
 | 
									    struct kvm_cpuid *cpuid,
 | 
				
			||||||
				    struct kvm_cpuid_entry __user *entries)
 | 
									    struct kvm_cpuid_entry __user *entries)
 | 
				
			||||||
| 
						 | 
					@ -2390,6 +2484,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 | 
				
			||||||
			   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
 | 
								   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	vcpu->cpuid_nent = cpuid->nent;
 | 
						vcpu->cpuid_nent = cpuid->nent;
 | 
				
			||||||
 | 
						cpuid_fix_nx_cap(vcpu);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
| 
						 | 
					@ -2738,41 +2833,18 @@ static int kvm_dev_ioctl_create_vm(void)
 | 
				
			||||||
	struct file *file;
 | 
						struct file *file;
 | 
				
			||||||
	struct kvm *kvm;
 | 
						struct kvm *kvm;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	inode = kvmfs_inode(&kvm_vm_fops);
 | 
					 | 
				
			||||||
	if (IS_ERR(inode)) {
 | 
					 | 
				
			||||||
		r = PTR_ERR(inode);
 | 
					 | 
				
			||||||
		goto out1;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kvm = kvm_create_vm();
 | 
						kvm = kvm_create_vm();
 | 
				
			||||||
	if (IS_ERR(kvm)) {
 | 
						if (IS_ERR(kvm))
 | 
				
			||||||
		r = PTR_ERR(kvm);
 | 
							return PTR_ERR(kvm);
 | 
				
			||||||
		goto out2;
 | 
						r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
 | 
				
			||||||
 | 
						if (r) {
 | 
				
			||||||
 | 
							kvm_destroy_vm(kvm);
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	file = kvmfs_file(inode, kvm);
 | 
					 | 
				
			||||||
	if (IS_ERR(file)) {
 | 
					 | 
				
			||||||
		r = PTR_ERR(file);
 | 
					 | 
				
			||||||
		goto out3;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	kvm->filp = file;
 | 
						kvm->filp = file;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	r = get_unused_fd();
 | 
					 | 
				
			||||||
	if (r < 0)
 | 
					 | 
				
			||||||
		goto out4;
 | 
					 | 
				
			||||||
	fd = r;
 | 
					 | 
				
			||||||
	fd_install(fd, file);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return fd;
 | 
						return fd;
 | 
				
			||||||
 | 
					 | 
				
			||||||
out4:
 | 
					 | 
				
			||||||
	fput(file);
 | 
					 | 
				
			||||||
out3:
 | 
					 | 
				
			||||||
	kvm_destroy_vm(kvm);
 | 
					 | 
				
			||||||
out2:
 | 
					 | 
				
			||||||
	iput(inode);
 | 
					 | 
				
			||||||
out1:
 | 
					 | 
				
			||||||
	return r;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static long kvm_dev_ioctl(struct file *filp,
 | 
					static long kvm_dev_ioctl(struct file *filp,
 | 
				
			||||||
| 
						 | 
					@ -2862,7 +2934,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
 | 
				
			||||||
		 * in vmx root mode.
 | 
							 * in vmx root mode.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		printk(KERN_INFO "kvm: exiting hardware virtualization\n");
 | 
							printk(KERN_INFO "kvm: exiting hardware virtualization\n");
 | 
				
			||||||
		on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
 | 
							on_each_cpu(hardware_disable, NULL, 0, 1);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return NOTIFY_OK;
 | 
						return NOTIFY_OK;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -2905,33 +2977,88 @@ static void decache_vcpus_on_cpu(int cpu)
 | 
				
			||||||
	spin_unlock(&kvm_lock);
 | 
						spin_unlock(&kvm_lock);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void hardware_enable(void *junk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int cpu = raw_smp_processor_id();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (cpu_isset(cpu, cpus_hardware_enabled))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						cpu_set(cpu, cpus_hardware_enabled);
 | 
				
			||||||
 | 
						kvm_arch_ops->hardware_enable(NULL);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void hardware_disable(void *junk)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int cpu = raw_smp_processor_id();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!cpu_isset(cpu, cpus_hardware_enabled))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						cpu_clear(cpu, cpus_hardware_enabled);
 | 
				
			||||||
 | 
						decache_vcpus_on_cpu(cpu);
 | 
				
			||||||
 | 
						kvm_arch_ops->hardware_disable(NULL);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 | 
					static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 | 
				
			||||||
			   void *v)
 | 
								   void *v)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int cpu = (long)v;
 | 
						int cpu = (long)v;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	switch (val) {
 | 
						switch (val) {
 | 
				
			||||||
	case CPU_DOWN_PREPARE:
 | 
						case CPU_DYING:
 | 
				
			||||||
	case CPU_DOWN_PREPARE_FROZEN:
 | 
						case CPU_DYING_FROZEN:
 | 
				
			||||||
	case CPU_UP_CANCELED:
 | 
						case CPU_UP_CANCELED:
 | 
				
			||||||
	case CPU_UP_CANCELED_FROZEN:
 | 
						case CPU_UP_CANCELED_FROZEN:
 | 
				
			||||||
		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
 | 
							printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
 | 
				
			||||||
		       cpu);
 | 
							       cpu);
 | 
				
			||||||
		decache_vcpus_on_cpu(cpu);
 | 
							smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
 | 
				
			||||||
		smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
 | 
					 | 
				
			||||||
					 NULL, 0, 1);
 | 
					 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	case CPU_ONLINE:
 | 
						case CPU_ONLINE:
 | 
				
			||||||
	case CPU_ONLINE_FROZEN:
 | 
						case CPU_ONLINE_FROZEN:
 | 
				
			||||||
		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
 | 
							printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
 | 
				
			||||||
		       cpu);
 | 
							       cpu);
 | 
				
			||||||
		smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
 | 
							smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
 | 
				
			||||||
					 NULL, 0, 1);
 | 
					 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return NOTIFY_OK;
 | 
						return NOTIFY_OK;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kvm_io_bus_init(struct kvm_io_bus *bus)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						memset(bus, 0, sizeof(*bus));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < bus->dev_count; i++) {
 | 
				
			||||||
 | 
							struct kvm_io_device *pos = bus->devs[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							kvm_iodevice_destructor(pos);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for (i = 0; i < bus->dev_count; i++) {
 | 
				
			||||||
 | 
							struct kvm_io_device *pos = bus->devs[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (pos->in_range(pos, addr))
 | 
				
			||||||
 | 
								return pos;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						bus->devs[bus->dev_count++] = dev;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct notifier_block kvm_cpu_notifier = {
 | 
					static struct notifier_block kvm_cpu_notifier = {
 | 
				
			||||||
	.notifier_call = kvm_cpu_hotplug,
 | 
						.notifier_call = kvm_cpu_hotplug,
 | 
				
			||||||
	.priority = 20, /* must be > scheduler priority */
 | 
						.priority = 20, /* must be > scheduler priority */
 | 
				
			||||||
| 
						 | 
					@ -2983,14 +3110,13 @@ static void kvm_exit_debug(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 | 
					static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	decache_vcpus_on_cpu(raw_smp_processor_id());
 | 
						hardware_disable(NULL);
 | 
				
			||||||
	on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int kvm_resume(struct sys_device *dev)
 | 
					static int kvm_resume(struct sys_device *dev)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
 | 
						hardware_enable(NULL);
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3007,18 +3133,6 @@ static struct sys_device kvm_sysdev = {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
hpa_t bad_page_address;
 | 
					hpa_t bad_page_address;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int kvmfs_get_sb(struct file_system_type *fs_type, int flags,
 | 
					 | 
				
			||||||
			const char *dev_name, void *data, struct vfsmount *mnt)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static struct file_system_type kvm_fs_type = {
 | 
					 | 
				
			||||||
	.name		= "kvmfs",
 | 
					 | 
				
			||||||
	.get_sb		= kvmfs_get_sb,
 | 
					 | 
				
			||||||
	.kill_sb	= kill_anon_super,
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 | 
					int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int r;
 | 
						int r;
 | 
				
			||||||
| 
						 | 
					@ -3043,7 +3157,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 | 
				
			||||||
	if (r < 0)
 | 
						if (r < 0)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
 | 
						on_each_cpu(hardware_enable, NULL, 0, 1);
 | 
				
			||||||
	r = register_cpu_notifier(&kvm_cpu_notifier);
 | 
						r = register_cpu_notifier(&kvm_cpu_notifier);
 | 
				
			||||||
	if (r)
 | 
						if (r)
 | 
				
			||||||
		goto out_free_1;
 | 
							goto out_free_1;
 | 
				
			||||||
| 
						 | 
					@ -3075,7 +3189,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 | 
				
			||||||
	unregister_reboot_notifier(&kvm_reboot_notifier);
 | 
						unregister_reboot_notifier(&kvm_reboot_notifier);
 | 
				
			||||||
	unregister_cpu_notifier(&kvm_cpu_notifier);
 | 
						unregister_cpu_notifier(&kvm_cpu_notifier);
 | 
				
			||||||
out_free_1:
 | 
					out_free_1:
 | 
				
			||||||
	on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
 | 
						on_each_cpu(hardware_disable, NULL, 0, 1);
 | 
				
			||||||
	kvm_arch_ops->hardware_unsetup();
 | 
						kvm_arch_ops->hardware_unsetup();
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	kvm_arch_ops = NULL;
 | 
						kvm_arch_ops = NULL;
 | 
				
			||||||
| 
						 | 
					@ -3089,7 +3203,7 @@ void kvm_exit_arch(void)
 | 
				
			||||||
	sysdev_class_unregister(&kvm_sysdev_class);
 | 
						sysdev_class_unregister(&kvm_sysdev_class);
 | 
				
			||||||
	unregister_reboot_notifier(&kvm_reboot_notifier);
 | 
						unregister_reboot_notifier(&kvm_reboot_notifier);
 | 
				
			||||||
	unregister_cpu_notifier(&kvm_cpu_notifier);
 | 
						unregister_cpu_notifier(&kvm_cpu_notifier);
 | 
				
			||||||
	on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
 | 
						on_each_cpu(hardware_disable, NULL, 0, 1);
 | 
				
			||||||
	kvm_arch_ops->hardware_unsetup();
 | 
						kvm_arch_ops->hardware_unsetup();
 | 
				
			||||||
	kvm_arch_ops = NULL;
 | 
						kvm_arch_ops = NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -3103,14 +3217,6 @@ static __init int kvm_init(void)
 | 
				
			||||||
	if (r)
 | 
						if (r)
 | 
				
			||||||
		goto out4;
 | 
							goto out4;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	r = register_filesystem(&kvm_fs_type);
 | 
					 | 
				
			||||||
	if (r)
 | 
					 | 
				
			||||||
		goto out3;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kvmfs_mnt = kern_mount(&kvm_fs_type);
 | 
					 | 
				
			||||||
	r = PTR_ERR(kvmfs_mnt);
 | 
					 | 
				
			||||||
	if (IS_ERR(kvmfs_mnt))
 | 
					 | 
				
			||||||
		goto out2;
 | 
					 | 
				
			||||||
	kvm_init_debug();
 | 
						kvm_init_debug();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	kvm_init_msr_list();
 | 
						kvm_init_msr_list();
 | 
				
			||||||
| 
						 | 
					@ -3127,10 +3233,6 @@ static __init int kvm_init(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	kvm_exit_debug();
 | 
						kvm_exit_debug();
 | 
				
			||||||
	mntput(kvmfs_mnt);
 | 
					 | 
				
			||||||
out2:
 | 
					 | 
				
			||||||
	unregister_filesystem(&kvm_fs_type);
 | 
					 | 
				
			||||||
out3:
 | 
					 | 
				
			||||||
	kvm_mmu_module_exit();
 | 
						kvm_mmu_module_exit();
 | 
				
			||||||
out4:
 | 
					out4:
 | 
				
			||||||
	return r;
 | 
						return r;
 | 
				
			||||||
| 
						 | 
					@ -3140,8 +3242,6 @@ static __exit void kvm_exit(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	kvm_exit_debug();
 | 
						kvm_exit_debug();
 | 
				
			||||||
	__free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
 | 
						__free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
 | 
				
			||||||
	mntput(kvmfs_mnt);
 | 
					 | 
				
			||||||
	unregister_filesystem(&kvm_fs_type);
 | 
					 | 
				
			||||||
	kvm_mmu_module_exit();
 | 
						kvm_mmu_module_exit();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,15 +16,18 @@
 | 
				
			||||||
 * the COPYING file in the top-level directory.
 | 
					 * the COPYING file in the top-level directory.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "vmx.h"
 | 
				
			||||||
 | 
					#include "kvm.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/types.h>
 | 
					#include <linux/types.h>
 | 
				
			||||||
#include <linux/string.h>
 | 
					#include <linux/string.h>
 | 
				
			||||||
#include <asm/page.h>
 | 
					 | 
				
			||||||
#include <linux/mm.h>
 | 
					#include <linux/mm.h>
 | 
				
			||||||
#include <linux/highmem.h>
 | 
					#include <linux/highmem.h>
 | 
				
			||||||
#include <linux/module.h>
 | 
					#include <linux/module.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "vmx.h"
 | 
					#include <asm/page.h>
 | 
				
			||||||
#include "kvm.h"
 | 
					#include <asm/cmpxchg.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#undef MMU_DEBUG
 | 
					#undef MMU_DEBUG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -90,25 +93,11 @@ static int dbg = 1;
 | 
				
			||||||
#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
 | 
					#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define PT32_PTE_COPY_MASK \
 | 
					 | 
				
			||||||
	(PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define PT_FIRST_AVAIL_BITS_SHIFT 9
 | 
					#define PT_FIRST_AVAIL_BITS_SHIFT 9
 | 
				
			||||||
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
 | 
					#define PT64_SECOND_AVAIL_BITS_SHIFT 52
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
 | 
					 | 
				
			||||||
#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
 | 
					#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1)
 | 
					 | 
				
			||||||
#define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1)
 | 
					 | 
				
			||||||
#define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
 | 
					#define VALID_PAGE(x) ((x) != INVALID_PAGE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define PT64_LEVEL_BITS 9
 | 
					#define PT64_LEVEL_BITS 9
 | 
				
			||||||
| 
						 | 
					@ -165,6 +154,8 @@ struct kvm_rmap_desc {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct kmem_cache *pte_chain_cache;
 | 
					static struct kmem_cache *pte_chain_cache;
 | 
				
			||||||
static struct kmem_cache *rmap_desc_cache;
 | 
					static struct kmem_cache *rmap_desc_cache;
 | 
				
			||||||
 | 
					static struct kmem_cache *mmu_page_cache;
 | 
				
			||||||
 | 
					static struct kmem_cache *mmu_page_header_cache;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int is_write_protection(struct kvm_vcpu *vcpu)
 | 
					static int is_write_protection(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -202,6 +193,15 @@ static int is_rmap_pte(u64 pte)
 | 
				
			||||||
		== (PT_WRITABLE_MASK | PT_PRESENT_MASK);
 | 
							== (PT_WRITABLE_MASK | PT_PRESENT_MASK);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void set_shadow_pte(u64 *sptep, u64 spte)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#ifdef CONFIG_X86_64
 | 
				
			||||||
 | 
						set_64bit((unsigned long *)sptep, spte);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						set_64bit((unsigned long long *)sptep, spte);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 | 
					static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 | 
				
			||||||
				  struct kmem_cache *base_cache, int min,
 | 
									  struct kmem_cache *base_cache, int min,
 | 
				
			||||||
				  gfp_t gfp_flags)
 | 
									  gfp_t gfp_flags)
 | 
				
			||||||
| 
						 | 
					@ -235,6 +235,14 @@ static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
 | 
				
			||||||
		goto out;
 | 
							goto out;
 | 
				
			||||||
	r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
 | 
						r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
 | 
				
			||||||
				   rmap_desc_cache, 1, gfp_flags);
 | 
									   rmap_desc_cache, 1, gfp_flags);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						r = mmu_topup_memory_cache(&vcpu->mmu_page_cache,
 | 
				
			||||||
 | 
									   mmu_page_cache, 4, gfp_flags);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
 | 
				
			||||||
 | 
									   mmu_page_header_cache, 4, gfp_flags);
 | 
				
			||||||
out:
 | 
					out:
 | 
				
			||||||
	return r;
 | 
						return r;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -258,6 +266,8 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
 | 
						mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
 | 
				
			||||||
	mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
 | 
						mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
 | 
				
			||||||
 | 
						mmu_free_memory_cache(&vcpu->mmu_page_cache);
 | 
				
			||||||
 | 
						mmu_free_memory_cache(&vcpu->mmu_page_header_cache);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
 | 
					static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
 | 
				
			||||||
| 
						 | 
					@ -433,19 +443,18 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 | 
				
			||||||
		BUG_ON(!(*spte & PT_WRITABLE_MASK));
 | 
							BUG_ON(!(*spte & PT_WRITABLE_MASK));
 | 
				
			||||||
		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
 | 
							rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
 | 
				
			||||||
		rmap_remove(vcpu, spte);
 | 
							rmap_remove(vcpu, spte);
 | 
				
			||||||
		kvm_arch_ops->tlb_flush(vcpu);
 | 
							set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
 | 
				
			||||||
		*spte &= ~(u64)PT_WRITABLE_MASK;
 | 
							kvm_flush_remote_tlbs(vcpu->kvm);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef MMU_DEBUG
 | 
					#ifdef MMU_DEBUG
 | 
				
			||||||
static int is_empty_shadow_page(hpa_t page_hpa)
 | 
					static int is_empty_shadow_page(u64 *spt)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u64 *pos;
 | 
						u64 *pos;
 | 
				
			||||||
	u64 *end;
 | 
						u64 *end;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64);
 | 
						for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
 | 
				
			||||||
		      pos != end; pos++)
 | 
					 | 
				
			||||||
		if (*pos != 0) {
 | 
							if (*pos != 0) {
 | 
				
			||||||
			printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
 | 
								printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
 | 
				
			||||||
			       pos, *pos);
 | 
								       pos, *pos);
 | 
				
			||||||
| 
						 | 
					@ -455,13 +464,13 @@ static int is_empty_shadow_page(hpa_t page_hpa)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
 | 
					static void kvm_mmu_free_page(struct kvm_vcpu *vcpu,
 | 
				
			||||||
 | 
								      struct kvm_mmu_page *page_head)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm_mmu_page *page_head = page_header(page_hpa);
 | 
						ASSERT(is_empty_shadow_page(page_head->spt));
 | 
				
			||||||
 | 
						list_del(&page_head->link);
 | 
				
			||||||
	ASSERT(is_empty_shadow_page(page_hpa));
 | 
						mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt);
 | 
				
			||||||
	page_head->page_hpa = page_hpa;
 | 
						mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head);
 | 
				
			||||||
	list_move(&page_head->link, &vcpu->free_pages);
 | 
					 | 
				
			||||||
	++vcpu->kvm->n_free_mmu_pages;
 | 
						++vcpu->kvm->n_free_mmu_pages;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -475,12 +484,15 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm_mmu_page *page;
 | 
						struct kvm_mmu_page *page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (list_empty(&vcpu->free_pages))
 | 
						if (!vcpu->kvm->n_free_mmu_pages)
 | 
				
			||||||
		return NULL;
 | 
							return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
 | 
						page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
 | 
				
			||||||
	list_move(&page->link, &vcpu->kvm->active_mmu_pages);
 | 
									      sizeof *page);
 | 
				
			||||||
	ASSERT(is_empty_shadow_page(page->page_hpa));
 | 
						page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
 | 
				
			||||||
 | 
						set_page_private(virt_to_page(page->spt), (unsigned long)page);
 | 
				
			||||||
 | 
						list_add(&page->link, &vcpu->kvm->active_mmu_pages);
 | 
				
			||||||
 | 
						ASSERT(is_empty_shadow_page(page->spt));
 | 
				
			||||||
	page->slot_bitmap = 0;
 | 
						page->slot_bitmap = 0;
 | 
				
			||||||
	page->multimapped = 0;
 | 
						page->multimapped = 0;
 | 
				
			||||||
	page->parent_pte = parent_pte;
 | 
						page->parent_pte = parent_pte;
 | 
				
			||||||
| 
						 | 
					@ -638,7 +650,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
 | 
				
			||||||
	u64 *pt;
 | 
						u64 *pt;
 | 
				
			||||||
	u64 ent;
 | 
						u64 ent;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pt = __va(page->page_hpa);
 | 
						pt = page->spt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (page->role.level == PT_PAGE_TABLE_LEVEL) {
 | 
						if (page->role.level == PT_PAGE_TABLE_LEVEL) {
 | 
				
			||||||
		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
 | 
							for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
 | 
				
			||||||
| 
						 | 
					@ -646,7 +658,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
 | 
				
			||||||
				rmap_remove(vcpu, &pt[i]);
 | 
									rmap_remove(vcpu, &pt[i]);
 | 
				
			||||||
			pt[i] = 0;
 | 
								pt[i] = 0;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		kvm_arch_ops->tlb_flush(vcpu);
 | 
							kvm_flush_remote_tlbs(vcpu->kvm);
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -659,6 +671,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
 | 
				
			||||||
		ent &= PT64_BASE_ADDR_MASK;
 | 
							ent &= PT64_BASE_ADDR_MASK;
 | 
				
			||||||
		mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]);
 | 
							mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						kvm_flush_remote_tlbs(vcpu->kvm);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
 | 
					static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
 | 
				
			||||||
| 
						 | 
					@ -685,12 +698,12 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		BUG_ON(!parent_pte);
 | 
							BUG_ON(!parent_pte);
 | 
				
			||||||
		kvm_mmu_put_page(vcpu, page, parent_pte);
 | 
							kvm_mmu_put_page(vcpu, page, parent_pte);
 | 
				
			||||||
		*parent_pte = 0;
 | 
							set_shadow_pte(parent_pte, 0);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	kvm_mmu_page_unlink_children(vcpu, page);
 | 
						kvm_mmu_page_unlink_children(vcpu, page);
 | 
				
			||||||
	if (!page->root_count) {
 | 
						if (!page->root_count) {
 | 
				
			||||||
		hlist_del(&page->hash_link);
 | 
							hlist_del(&page->hash_link);
 | 
				
			||||||
		kvm_mmu_free_page(vcpu, page->page_hpa);
 | 
							kvm_mmu_free_page(vcpu, page);
 | 
				
			||||||
	} else
 | 
						} else
 | 
				
			||||||
		list_move(&page->link, &vcpu->kvm->active_mmu_pages);
 | 
							list_move(&page->link, &vcpu->kvm->active_mmu_pages);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -717,6 +730,17 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 | 
				
			||||||
	return r;
 | 
						return r;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						struct kvm_mmu_page *page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
 | 
				
			||||||
 | 
							pgprintk("%s: zap %lx %x\n",
 | 
				
			||||||
 | 
								 __FUNCTION__, gfn, page->role.word);
 | 
				
			||||||
 | 
							kvm_mmu_zap_page(vcpu, page);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
 | 
					static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
 | 
						int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
 | 
				
			||||||
| 
						 | 
					@ -805,7 +829,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
 | 
				
			||||||
				return -ENOMEM;
 | 
									return -ENOMEM;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			table[index] = new_table->page_hpa | PT_PRESENT_MASK
 | 
								table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
 | 
				
			||||||
				| PT_WRITABLE_MASK | PT_USER_MASK;
 | 
									| PT_WRITABLE_MASK | PT_USER_MASK;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		table_addr = table[index] & PT64_BASE_ADDR_MASK;
 | 
							table_addr = table[index] & PT64_BASE_ADDR_MASK;
 | 
				
			||||||
| 
						 | 
					@ -817,11 +841,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
	struct kvm_mmu_page *page;
 | 
						struct kvm_mmu_page *page;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!VALID_PAGE(vcpu->mmu.root_hpa))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
#ifdef CONFIG_X86_64
 | 
					#ifdef CONFIG_X86_64
 | 
				
			||||||
	if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
 | 
						if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
 | 
				
			||||||
		hpa_t root = vcpu->mmu.root_hpa;
 | 
							hpa_t root = vcpu->mmu.root_hpa;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ASSERT(VALID_PAGE(root));
 | 
					 | 
				
			||||||
		page = page_header(root);
 | 
							page = page_header(root);
 | 
				
			||||||
		--page->root_count;
 | 
							--page->root_count;
 | 
				
			||||||
		vcpu->mmu.root_hpa = INVALID_PAGE;
 | 
							vcpu->mmu.root_hpa = INVALID_PAGE;
 | 
				
			||||||
| 
						 | 
					@ -832,7 +857,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 | 
				
			||||||
		hpa_t root = vcpu->mmu.pae_root[i];
 | 
							hpa_t root = vcpu->mmu.pae_root[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (root) {
 | 
							if (root) {
 | 
				
			||||||
			ASSERT(VALID_PAGE(root));
 | 
					 | 
				
			||||||
			root &= PT64_BASE_ADDR_MASK;
 | 
								root &= PT64_BASE_ADDR_MASK;
 | 
				
			||||||
			page = page_header(root);
 | 
								page = page_header(root);
 | 
				
			||||||
			--page->root_count;
 | 
								--page->root_count;
 | 
				
			||||||
| 
						 | 
					@ -857,7 +881,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 | 
				
			||||||
		ASSERT(!VALID_PAGE(root));
 | 
							ASSERT(!VALID_PAGE(root));
 | 
				
			||||||
		page = kvm_mmu_get_page(vcpu, root_gfn, 0,
 | 
							page = kvm_mmu_get_page(vcpu, root_gfn, 0,
 | 
				
			||||||
					PT64_ROOT_LEVEL, 0, 0, NULL);
 | 
										PT64_ROOT_LEVEL, 0, 0, NULL);
 | 
				
			||||||
		root = page->page_hpa;
 | 
							root = __pa(page->spt);
 | 
				
			||||||
		++page->root_count;
 | 
							++page->root_count;
 | 
				
			||||||
		vcpu->mmu.root_hpa = root;
 | 
							vcpu->mmu.root_hpa = root;
 | 
				
			||||||
		return;
 | 
							return;
 | 
				
			||||||
| 
						 | 
					@ -878,7 +902,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 | 
				
			||||||
		page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 | 
							page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
 | 
				
			||||||
					PT32_ROOT_LEVEL, !is_paging(vcpu),
 | 
										PT32_ROOT_LEVEL, !is_paging(vcpu),
 | 
				
			||||||
					0, NULL);
 | 
										0, NULL);
 | 
				
			||||||
		root = page->page_hpa;
 | 
							root = __pa(page->spt);
 | 
				
			||||||
		++page->root_count;
 | 
							++page->root_count;
 | 
				
			||||||
		vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
 | 
							vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					@ -928,9 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	context->free = nonpaging_free;
 | 
						context->free = nonpaging_free;
 | 
				
			||||||
	context->root_level = 0;
 | 
						context->root_level = 0;
 | 
				
			||||||
	context->shadow_root_level = PT32E_ROOT_LEVEL;
 | 
						context->shadow_root_level = PT32E_ROOT_LEVEL;
 | 
				
			||||||
	mmu_alloc_roots(vcpu);
 | 
						context->root_hpa = INVALID_PAGE;
 | 
				
			||||||
	ASSERT(VALID_PAGE(context->root_hpa));
 | 
					 | 
				
			||||||
	kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -944,59 +966,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
 | 
						pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
 | 
				
			||||||
	mmu_free_roots(vcpu);
 | 
						mmu_free_roots(vcpu);
 | 
				
			||||||
	if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
 | 
					 | 
				
			||||||
		kvm_mmu_free_some_pages(vcpu);
 | 
					 | 
				
			||||||
	mmu_alloc_roots(vcpu);
 | 
					 | 
				
			||||||
	kvm_mmu_flush_tlb(vcpu);
 | 
					 | 
				
			||||||
	kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static inline void set_pte_common(struct kvm_vcpu *vcpu,
 | 
					 | 
				
			||||||
			     u64 *shadow_pte,
 | 
					 | 
				
			||||||
			     gpa_t gaddr,
 | 
					 | 
				
			||||||
			     int dirty,
 | 
					 | 
				
			||||||
			     u64 access_bits,
 | 
					 | 
				
			||||||
			     gfn_t gfn)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	hpa_t paddr;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
 | 
					 | 
				
			||||||
	if (!dirty)
 | 
					 | 
				
			||||||
		access_bits &= ~PT_WRITABLE_MASK;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*shadow_pte |= access_bits;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (is_error_hpa(paddr)) {
 | 
					 | 
				
			||||||
		*shadow_pte |= gaddr;
 | 
					 | 
				
			||||||
		*shadow_pte |= PT_SHADOW_IO_MARK;
 | 
					 | 
				
			||||||
		*shadow_pte &= ~PT_PRESENT_MASK;
 | 
					 | 
				
			||||||
		return;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	*shadow_pte |= paddr;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (access_bits & PT_WRITABLE_MASK) {
 | 
					 | 
				
			||||||
		struct kvm_mmu_page *shadow;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		shadow = kvm_mmu_lookup_page(vcpu, gfn);
 | 
					 | 
				
			||||||
		if (shadow) {
 | 
					 | 
				
			||||||
			pgprintk("%s: found shadow page for %lx, marking ro\n",
 | 
					 | 
				
			||||||
				 __FUNCTION__, gfn);
 | 
					 | 
				
			||||||
			access_bits &= ~PT_WRITABLE_MASK;
 | 
					 | 
				
			||||||
			if (is_writeble_pte(*shadow_pte)) {
 | 
					 | 
				
			||||||
				    *shadow_pte &= ~PT_WRITABLE_MASK;
 | 
					 | 
				
			||||||
				    kvm_arch_ops->tlb_flush(vcpu);
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (access_bits & PT_WRITABLE_MASK)
 | 
					 | 
				
			||||||
		mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
 | 
					 | 
				
			||||||
	rmap_add(vcpu, shadow_pte);
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void inject_page_fault(struct kvm_vcpu *vcpu,
 | 
					static void inject_page_fault(struct kvm_vcpu *vcpu,
 | 
				
			||||||
| 
						 | 
					@ -1006,23 +975,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
 | 
				
			||||||
	kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
 | 
						kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline int fix_read_pf(u64 *shadow_ent)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	if ((*shadow_ent & PT_SHADOW_USER_MASK) &&
 | 
					 | 
				
			||||||
	    !(*shadow_ent & PT_USER_MASK)) {
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * If supervisor write protect is disabled, we shadow kernel
 | 
					 | 
				
			||||||
		 * pages as user pages so we can trap the write access.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		*shadow_ent |= PT_USER_MASK;
 | 
					 | 
				
			||||||
		*shadow_ent &= ~PT_WRITABLE_MASK;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		return 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void paging_free(struct kvm_vcpu *vcpu)
 | 
					static void paging_free(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	nonpaging_free(vcpu);
 | 
						nonpaging_free(vcpu);
 | 
				
			||||||
| 
						 | 
					@ -1047,10 +999,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 | 
				
			||||||
	context->free = paging_free;
 | 
						context->free = paging_free;
 | 
				
			||||||
	context->root_level = level;
 | 
						context->root_level = level;
 | 
				
			||||||
	context->shadow_root_level = level;
 | 
						context->shadow_root_level = level;
 | 
				
			||||||
	mmu_alloc_roots(vcpu);
 | 
						context->root_hpa = INVALID_PAGE;
 | 
				
			||||||
	ASSERT(VALID_PAGE(context->root_hpa));
 | 
					 | 
				
			||||||
	kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
 | 
					 | 
				
			||||||
		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1069,10 +1018,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	context->free = paging_free;
 | 
						context->free = paging_free;
 | 
				
			||||||
	context->root_level = PT32_ROOT_LEVEL;
 | 
						context->root_level = PT32_ROOT_LEVEL;
 | 
				
			||||||
	context->shadow_root_level = PT32E_ROOT_LEVEL;
 | 
						context->shadow_root_level = PT32E_ROOT_LEVEL;
 | 
				
			||||||
	mmu_alloc_roots(vcpu);
 | 
						context->root_hpa = INVALID_PAGE;
 | 
				
			||||||
	ASSERT(VALID_PAGE(context->root_hpa));
 | 
					 | 
				
			||||||
	kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
 | 
					 | 
				
			||||||
		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
 | 
					 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1107,18 +1053,33 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 | 
					int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int r;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	destroy_kvm_mmu(vcpu);
 | 
						destroy_kvm_mmu(vcpu);
 | 
				
			||||||
	r = init_kvm_mmu(vcpu);
 | 
						return init_kvm_mmu(vcpu);
 | 
				
			||||||
	if (r < 0)
 | 
					 | 
				
			||||||
		goto out;
 | 
					 | 
				
			||||||
	r = mmu_topup_memory_caches(vcpu);
 | 
					 | 
				
			||||||
out:
 | 
					 | 
				
			||||||
	return r;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu,
 | 
					int kvm_mmu_load(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spin_lock(&vcpu->kvm->lock);
 | 
				
			||||||
 | 
						r = mmu_topup_memory_caches(vcpu);
 | 
				
			||||||
 | 
						if (r)
 | 
				
			||||||
 | 
							goto out;
 | 
				
			||||||
 | 
						mmu_alloc_roots(vcpu);
 | 
				
			||||||
 | 
						kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
 | 
				
			||||||
 | 
						kvm_mmu_flush_tlb(vcpu);
 | 
				
			||||||
 | 
					out:
 | 
				
			||||||
 | 
						spin_unlock(&vcpu->kvm->lock);
 | 
				
			||||||
 | 
						return r;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(kvm_mmu_load);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						mmu_free_roots(vcpu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
 | 
				
			||||||
				  struct kvm_mmu_page *page,
 | 
									  struct kvm_mmu_page *page,
 | 
				
			||||||
				  u64 *spte)
 | 
									  u64 *spte)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
| 
						 | 
					@ -1135,9 +1096,25 @@ static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu,
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	*spte = 0;
 | 
						*spte = 0;
 | 
				
			||||||
 | 
						kvm_flush_remote_tlbs(vcpu->kvm);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
 | 
					static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 | 
				
			||||||
 | 
									  struct kvm_mmu_page *page,
 | 
				
			||||||
 | 
									  u64 *spte,
 | 
				
			||||||
 | 
									  const void *new, int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						if (page->role.level != PT_PAGE_TABLE_LEVEL)
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (page->role.glevels == PT32_ROOT_LEVEL)
 | 
				
			||||||
 | 
							paging32_update_pte(vcpu, page, spte, new, bytes);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							paging64_update_pte(vcpu, page, spte, new, bytes);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 | 
				
			||||||
 | 
							       const u8 *old, const u8 *new, int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	gfn_t gfn = gpa >> PAGE_SHIFT;
 | 
						gfn_t gfn = gpa >> PAGE_SHIFT;
 | 
				
			||||||
	struct kvm_mmu_page *page;
 | 
						struct kvm_mmu_page *page;
 | 
				
			||||||
| 
						 | 
					@ -1149,6 +1126,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
 | 
				
			||||||
	unsigned pte_size;
 | 
						unsigned pte_size;
 | 
				
			||||||
	unsigned page_offset;
 | 
						unsigned page_offset;
 | 
				
			||||||
	unsigned misaligned;
 | 
						unsigned misaligned;
 | 
				
			||||||
 | 
						unsigned quadrant;
 | 
				
			||||||
	int level;
 | 
						int level;
 | 
				
			||||||
	int flooded = 0;
 | 
						int flooded = 0;
 | 
				
			||||||
	int npte;
 | 
						int npte;
 | 
				
			||||||
| 
						 | 
					@ -1169,6 +1147,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
 | 
							pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
 | 
				
			||||||
		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
 | 
							misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
 | 
				
			||||||
 | 
							misaligned |= bytes < 4;
 | 
				
			||||||
		if (misaligned || flooded) {
 | 
							if (misaligned || flooded) {
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * Misaligned accesses are too much trouble to fix
 | 
								 * Misaligned accesses are too much trouble to fix
 | 
				
			||||||
| 
						 | 
					@ -1200,21 +1179,20 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
 | 
				
			||||||
				page_offset <<= 1;
 | 
									page_offset <<= 1;
 | 
				
			||||||
				npte = 2;
 | 
									npte = 2;
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
								quadrant = page_offset >> PAGE_SHIFT;
 | 
				
			||||||
			page_offset &= ~PAGE_MASK;
 | 
								page_offset &= ~PAGE_MASK;
 | 
				
			||||||
 | 
								if (quadrant != page->role.quadrant)
 | 
				
			||||||
 | 
									continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		spte = __va(page->page_hpa);
 | 
							spte = &page->spt[page_offset / sizeof(*spte)];
 | 
				
			||||||
		spte += page_offset / sizeof(*spte);
 | 
					 | 
				
			||||||
		while (npte--) {
 | 
							while (npte--) {
 | 
				
			||||||
			mmu_pre_write_zap_pte(vcpu, page, spte);
 | 
								mmu_pte_write_zap_pte(vcpu, page, spte);
 | 
				
			||||||
 | 
								mmu_pte_write_new_pte(vcpu, page, spte, new, bytes);
 | 
				
			||||||
			++spte;
 | 
								++spte;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 | 
					int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
 | 
						gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
 | 
				
			||||||
| 
						 | 
					@ -1243,13 +1221,6 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu)
 | 
				
			||||||
				    struct kvm_mmu_page, link);
 | 
									    struct kvm_mmu_page, link);
 | 
				
			||||||
		kvm_mmu_zap_page(vcpu, page);
 | 
							kvm_mmu_zap_page(vcpu, page);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	while (!list_empty(&vcpu->free_pages)) {
 | 
					 | 
				
			||||||
		page = list_entry(vcpu->free_pages.next,
 | 
					 | 
				
			||||||
				  struct kvm_mmu_page, link);
 | 
					 | 
				
			||||||
		list_del(&page->link);
 | 
					 | 
				
			||||||
		__free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
 | 
					 | 
				
			||||||
		page->page_hpa = INVALID_PAGE;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	free_page((unsigned long)vcpu->mmu.pae_root);
 | 
						free_page((unsigned long)vcpu->mmu.pae_root);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1260,18 +1231,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ASSERT(vcpu);
 | 
						ASSERT(vcpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
 | 
						vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES;
 | 
				
			||||||
		struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		INIT_LIST_HEAD(&page_header->link);
 | 
					 | 
				
			||||||
		if ((page = alloc_page(GFP_KERNEL)) == NULL)
 | 
					 | 
				
			||||||
			goto error_1;
 | 
					 | 
				
			||||||
		set_page_private(page, (unsigned long)page_header);
 | 
					 | 
				
			||||||
		page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
 | 
					 | 
				
			||||||
		memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
 | 
					 | 
				
			||||||
		list_add(&page_header->link, &vcpu->free_pages);
 | 
					 | 
				
			||||||
		++vcpu->kvm->n_free_mmu_pages;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
 | 
						 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
 | 
				
			||||||
| 
						 | 
					@ -1296,7 +1256,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	ASSERT(vcpu);
 | 
						ASSERT(vcpu);
 | 
				
			||||||
	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
 | 
						ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
 | 
				
			||||||
	ASSERT(list_empty(&vcpu->free_pages));
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return alloc_mmu_pages(vcpu);
 | 
						return alloc_mmu_pages(vcpu);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1305,7 +1264,6 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	ASSERT(vcpu);
 | 
						ASSERT(vcpu);
 | 
				
			||||||
	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
 | 
						ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
 | 
				
			||||||
	ASSERT(!list_empty(&vcpu->free_pages));
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return init_kvm_mmu(vcpu);
 | 
						return init_kvm_mmu(vcpu);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1331,7 +1289,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
 | 
				
			||||||
		if (!test_bit(slot, &page->slot_bitmap))
 | 
							if (!test_bit(slot, &page->slot_bitmap))
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		pt = __va(page->page_hpa);
 | 
							pt = page->spt;
 | 
				
			||||||
		for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
 | 
							for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
 | 
				
			||||||
			/* avoid RMW */
 | 
								/* avoid RMW */
 | 
				
			||||||
			if (pt[i] & PT_WRITABLE_MASK) {
 | 
								if (pt[i] & PT_WRITABLE_MASK) {
 | 
				
			||||||
| 
						 | 
					@ -1354,7 +1312,7 @@ void kvm_mmu_zap_all(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mmu_free_memory_caches(vcpu);
 | 
						mmu_free_memory_caches(vcpu);
 | 
				
			||||||
	kvm_arch_ops->tlb_flush(vcpu);
 | 
						kvm_flush_remote_tlbs(vcpu->kvm);
 | 
				
			||||||
	init_kvm_mmu(vcpu);
 | 
						init_kvm_mmu(vcpu);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1364,6 +1322,10 @@ void kvm_mmu_module_exit(void)
 | 
				
			||||||
		kmem_cache_destroy(pte_chain_cache);
 | 
							kmem_cache_destroy(pte_chain_cache);
 | 
				
			||||||
	if (rmap_desc_cache)
 | 
						if (rmap_desc_cache)
 | 
				
			||||||
		kmem_cache_destroy(rmap_desc_cache);
 | 
							kmem_cache_destroy(rmap_desc_cache);
 | 
				
			||||||
 | 
						if (mmu_page_cache)
 | 
				
			||||||
 | 
							kmem_cache_destroy(mmu_page_cache);
 | 
				
			||||||
 | 
						if (mmu_page_header_cache)
 | 
				
			||||||
 | 
							kmem_cache_destroy(mmu_page_header_cache);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_mmu_module_init(void)
 | 
					int kvm_mmu_module_init(void)
 | 
				
			||||||
| 
						 | 
					@ -1379,6 +1341,18 @@ int kvm_mmu_module_init(void)
 | 
				
			||||||
	if (!rmap_desc_cache)
 | 
						if (!rmap_desc_cache)
 | 
				
			||||||
		goto nomem;
 | 
							goto nomem;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mmu_page_cache = kmem_cache_create("kvm_mmu_page",
 | 
				
			||||||
 | 
										   PAGE_SIZE,
 | 
				
			||||||
 | 
										   PAGE_SIZE, 0, NULL, NULL);
 | 
				
			||||||
 | 
						if (!mmu_page_cache)
 | 
				
			||||||
 | 
							goto nomem;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
 | 
				
			||||||
 | 
											  sizeof(struct kvm_mmu_page),
 | 
				
			||||||
 | 
											  0, 0, NULL, NULL);
 | 
				
			||||||
 | 
						if (!mmu_page_header_cache)
 | 
				
			||||||
 | 
							goto nomem;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
nomem:
 | 
					nomem:
 | 
				
			||||||
| 
						 | 
					@ -1482,7 +1456,7 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
 | 
						list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
 | 
				
			||||||
		u64 *pt = __va(page->page_hpa);
 | 
							u64 *pt = page->spt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (page->role.level != PT_PAGE_TABLE_LEVEL)
 | 
							if (page->role.level != PT_PAGE_TABLE_LEVEL)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,7 +31,6 @@
 | 
				
			||||||
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
 | 
						#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
 | 
				
			||||||
	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
 | 
						#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
 | 
				
			||||||
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
 | 
						#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
 | 
				
			||||||
	#define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
 | 
					 | 
				
			||||||
	#ifdef CONFIG_X86_64
 | 
						#ifdef CONFIG_X86_64
 | 
				
			||||||
	#define PT_MAX_FULL_LEVELS 4
 | 
						#define PT_MAX_FULL_LEVELS 4
 | 
				
			||||||
	#else
 | 
						#else
 | 
				
			||||||
| 
						 | 
					@ -46,7 +45,6 @@
 | 
				
			||||||
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
 | 
						#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
 | 
				
			||||||
	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
 | 
						#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
 | 
				
			||||||
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
 | 
						#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
 | 
				
			||||||
	#define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
 | 
					 | 
				
			||||||
	#define PT_MAX_FULL_LEVELS 2
 | 
						#define PT_MAX_FULL_LEVELS 2
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	#error Invalid PTTYPE value
 | 
						#error Invalid PTTYPE value
 | 
				
			||||||
| 
						 | 
					@ -192,40 +190,143 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
 | 
				
			||||||
	mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
 | 
						mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
 | 
					static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
 | 
				
			||||||
			   u64 *shadow_pte, u64 access_bits, gfn_t gfn)
 | 
									  u64 *shadow_pte,
 | 
				
			||||||
 | 
									  gpa_t gaddr,
 | 
				
			||||||
 | 
									  pt_element_t *gpte,
 | 
				
			||||||
 | 
									  u64 access_bits,
 | 
				
			||||||
 | 
									  int user_fault,
 | 
				
			||||||
 | 
									  int write_fault,
 | 
				
			||||||
 | 
									  int *ptwrite,
 | 
				
			||||||
 | 
									  struct guest_walker *walker,
 | 
				
			||||||
 | 
									  gfn_t gfn)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	ASSERT(*shadow_pte == 0);
 | 
						hpa_t paddr;
 | 
				
			||||||
	access_bits &= guest_pte;
 | 
						int dirty = *gpte & PT_DIRTY_MASK;
 | 
				
			||||||
	*shadow_pte = (guest_pte & PT_PTE_COPY_MASK);
 | 
						u64 spte = *shadow_pte;
 | 
				
			||||||
	set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK,
 | 
						int was_rmapped = is_rmap_pte(spte);
 | 
				
			||||||
		       guest_pte & PT_DIRTY_MASK, access_bits, gfn);
 | 
					
 | 
				
			||||||
 | 
						pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
 | 
				
			||||||
 | 
							 " user_fault %d gfn %lx\n",
 | 
				
			||||||
 | 
							 __FUNCTION__, spte, (u64)*gpte, access_bits,
 | 
				
			||||||
 | 
							 write_fault, user_fault, gfn);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (write_fault && !dirty) {
 | 
				
			||||||
 | 
							*gpte |= PT_DIRTY_MASK;
 | 
				
			||||||
 | 
							dirty = 1;
 | 
				
			||||||
 | 
							FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
 | 
				
			||||||
 | 
						spte |= *gpte & PT64_NX_MASK;
 | 
				
			||||||
 | 
						if (!dirty)
 | 
				
			||||||
 | 
							access_bits &= ~PT_WRITABLE_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spte |= PT_PRESENT_MASK;
 | 
				
			||||||
 | 
						if (access_bits & PT_USER_MASK)
 | 
				
			||||||
 | 
							spte |= PT_USER_MASK;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (is_error_hpa(paddr)) {
 | 
				
			||||||
 | 
							spte |= gaddr;
 | 
				
			||||||
 | 
							spte |= PT_SHADOW_IO_MARK;
 | 
				
			||||||
 | 
							spte &= ~PT_PRESENT_MASK;
 | 
				
			||||||
 | 
							set_shadow_pte(shadow_pte, spte);
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						spte |= paddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if ((access_bits & PT_WRITABLE_MASK)
 | 
				
			||||||
 | 
						    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
 | 
				
			||||||
 | 
							struct kvm_mmu_page *shadow;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							spte |= PT_WRITABLE_MASK;
 | 
				
			||||||
 | 
							if (user_fault) {
 | 
				
			||||||
 | 
								mmu_unshadow(vcpu, gfn);
 | 
				
			||||||
 | 
								goto unshadowed;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							shadow = kvm_mmu_lookup_page(vcpu, gfn);
 | 
				
			||||||
 | 
							if (shadow) {
 | 
				
			||||||
 | 
								pgprintk("%s: found shadow page for %lx, marking ro\n",
 | 
				
			||||||
 | 
									 __FUNCTION__, gfn);
 | 
				
			||||||
 | 
								access_bits &= ~PT_WRITABLE_MASK;
 | 
				
			||||||
 | 
								if (is_writeble_pte(spte)) {
 | 
				
			||||||
 | 
									spte &= ~PT_WRITABLE_MASK;
 | 
				
			||||||
 | 
									kvm_arch_ops->tlb_flush(vcpu);
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								if (write_fault)
 | 
				
			||||||
 | 
									*ptwrite = 1;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					unshadowed:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (access_bits & PT_WRITABLE_MASK)
 | 
				
			||||||
 | 
							mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						set_shadow_pte(shadow_pte, spte);
 | 
				
			||||||
 | 
						page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
 | 
				
			||||||
 | 
						if (!was_rmapped)
 | 
				
			||||||
 | 
							rmap_add(vcpu, shadow_pte);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde,
 | 
					static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte,
 | 
				
			||||||
			   u64 *shadow_pte, u64 access_bits, gfn_t gfn)
 | 
								   u64 *shadow_pte, u64 access_bits,
 | 
				
			||||||
 | 
								   int user_fault, int write_fault, int *ptwrite,
 | 
				
			||||||
 | 
								   struct guest_walker *walker, gfn_t gfn)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						access_bits &= *gpte;
 | 
				
			||||||
 | 
						FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK,
 | 
				
			||||||
 | 
								      gpte, access_bits, user_fault, write_fault,
 | 
				
			||||||
 | 
								      ptwrite, walker, gfn);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
 | 
				
			||||||
 | 
								      u64 *spte, const void *pte, int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						pt_element_t gpte;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (bytes < sizeof(pt_element_t))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						gpte = *(const pt_element_t *)pte;
 | 
				
			||||||
 | 
						if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
 | 
				
			||||||
 | 
							return;
 | 
				
			||||||
 | 
						pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
 | 
				
			||||||
 | 
						FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
 | 
				
			||||||
 | 
							       0, NULL, NULL,
 | 
				
			||||||
 | 
							       (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde,
 | 
				
			||||||
 | 
								   u64 *shadow_pte, u64 access_bits,
 | 
				
			||||||
 | 
								   int user_fault, int write_fault, int *ptwrite,
 | 
				
			||||||
 | 
								   struct guest_walker *walker, gfn_t gfn)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	gpa_t gaddr;
 | 
						gpa_t gaddr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ASSERT(*shadow_pte == 0);
 | 
						access_bits &= *gpde;
 | 
				
			||||||
	access_bits &= guest_pde;
 | 
					 | 
				
			||||||
	gaddr = (gpa_t)gfn << PAGE_SHIFT;
 | 
						gaddr = (gpa_t)gfn << PAGE_SHIFT;
 | 
				
			||||||
	if (PTTYPE == 32 && is_cpuid_PSE36())
 | 
						if (PTTYPE == 32 && is_cpuid_PSE36())
 | 
				
			||||||
		gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) <<
 | 
							gaddr |= (*gpde & PT32_DIR_PSE36_MASK) <<
 | 
				
			||||||
			(32 - PT32_DIR_PSE36_SHIFT);
 | 
								(32 - PT32_DIR_PSE36_SHIFT);
 | 
				
			||||||
	*shadow_pte = guest_pde & PT_PTE_COPY_MASK;
 | 
						FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
 | 
				
			||||||
	set_pte_common(vcpu, shadow_pte, gaddr,
 | 
								      gpde, access_bits, user_fault, write_fault,
 | 
				
			||||||
		       guest_pde & PT_DIRTY_MASK, access_bits, gfn);
 | 
								      ptwrite, walker, gfn);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 | 
					 * Fetch a shadow pte for a specific level in the paging hierarchy.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
					static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
				
			||||||
			      struct guest_walker *walker)
 | 
								 struct guest_walker *walker,
 | 
				
			||||||
 | 
								 int user_fault, int write_fault, int *ptwrite)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	hpa_t shadow_addr;
 | 
						hpa_t shadow_addr;
 | 
				
			||||||
	int level;
 | 
						int level;
 | 
				
			||||||
 | 
						u64 *shadow_ent;
 | 
				
			||||||
	u64 *prev_shadow_ent = NULL;
 | 
						u64 *prev_shadow_ent = NULL;
 | 
				
			||||||
	pt_element_t *guest_ent = walker->ptep;
 | 
						pt_element_t *guest_ent = walker->ptep;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -242,37 +343,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for (; ; level--) {
 | 
						for (; ; level--) {
 | 
				
			||||||
		u32 index = SHADOW_PT_INDEX(addr, level);
 | 
							u32 index = SHADOW_PT_INDEX(addr, level);
 | 
				
			||||||
		u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
 | 
					 | 
				
			||||||
		struct kvm_mmu_page *shadow_page;
 | 
							struct kvm_mmu_page *shadow_page;
 | 
				
			||||||
		u64 shadow_pte;
 | 
							u64 shadow_pte;
 | 
				
			||||||
		int metaphysical;
 | 
							int metaphysical;
 | 
				
			||||||
		gfn_t table_gfn;
 | 
							gfn_t table_gfn;
 | 
				
			||||||
		unsigned hugepage_access = 0;
 | 
							unsigned hugepage_access = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							shadow_ent = ((u64 *)__va(shadow_addr)) + index;
 | 
				
			||||||
		if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
 | 
							if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
 | 
				
			||||||
			if (level == PT_PAGE_TABLE_LEVEL)
 | 
								if (level == PT_PAGE_TABLE_LEVEL)
 | 
				
			||||||
				return shadow_ent;
 | 
									break;
 | 
				
			||||||
			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
 | 
								shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
 | 
				
			||||||
			prev_shadow_ent = shadow_ent;
 | 
								prev_shadow_ent = shadow_ent;
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (level == PT_PAGE_TABLE_LEVEL) {
 | 
							if (level == PT_PAGE_TABLE_LEVEL)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
			if (walker->level == PT_DIRECTORY_LEVEL) {
 | 
					 | 
				
			||||||
				if (prev_shadow_ent)
 | 
					 | 
				
			||||||
					*prev_shadow_ent |= PT_SHADOW_PS_MARK;
 | 
					 | 
				
			||||||
				FNAME(set_pde)(vcpu, *guest_ent, shadow_ent,
 | 
					 | 
				
			||||||
					       walker->inherited_ar,
 | 
					 | 
				
			||||||
					       walker->gfn);
 | 
					 | 
				
			||||||
			} else {
 | 
					 | 
				
			||||||
				ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
 | 
					 | 
				
			||||||
				FNAME(set_pte)(vcpu, *guest_ent, shadow_ent,
 | 
					 | 
				
			||||||
					       walker->inherited_ar,
 | 
					 | 
				
			||||||
					       walker->gfn);
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			return shadow_ent;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if (level - 1 == PT_PAGE_TABLE_LEVEL
 | 
							if (level - 1 == PT_PAGE_TABLE_LEVEL
 | 
				
			||||||
		    && walker->level == PT_DIRECTORY_LEVEL) {
 | 
							    && walker->level == PT_DIRECTORY_LEVEL) {
 | 
				
			||||||
| 
						 | 
					@ -289,90 +376,24 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
				
			||||||
		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
 | 
							shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
 | 
				
			||||||
					       metaphysical, hugepage_access,
 | 
										       metaphysical, hugepage_access,
 | 
				
			||||||
					       shadow_ent);
 | 
										       shadow_ent);
 | 
				
			||||||
		shadow_addr = shadow_page->page_hpa;
 | 
							shadow_addr = __pa(shadow_page->spt);
 | 
				
			||||||
		shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
 | 
							shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
 | 
				
			||||||
			| PT_WRITABLE_MASK | PT_USER_MASK;
 | 
								| PT_WRITABLE_MASK | PT_USER_MASK;
 | 
				
			||||||
		*shadow_ent = shadow_pte;
 | 
							*shadow_ent = shadow_pte;
 | 
				
			||||||
		prev_shadow_ent = shadow_ent;
 | 
							prev_shadow_ent = shadow_ent;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
						if (walker->level == PT_DIRECTORY_LEVEL) {
 | 
				
			||||||
 * The guest faulted for write.  We need to
 | 
							FNAME(set_pde)(vcpu, guest_ent, shadow_ent,
 | 
				
			||||||
 *
 | 
								       walker->inherited_ar, user_fault, write_fault,
 | 
				
			||||||
 * - check write permissions
 | 
								       ptwrite, walker, walker->gfn);
 | 
				
			||||||
 * - update the guest pte dirty bit
 | 
						} else {
 | 
				
			||||||
 * - update our own dirty page tracking structures
 | 
							ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
 | 
				
			||||||
 */
 | 
							FNAME(set_pte)(vcpu, guest_ent, shadow_ent,
 | 
				
			||||||
static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
 | 
								       walker->inherited_ar, user_fault, write_fault,
 | 
				
			||||||
			       u64 *shadow_ent,
 | 
								       ptwrite, walker, walker->gfn);
 | 
				
			||||||
			       struct guest_walker *walker,
 | 
					 | 
				
			||||||
			       gva_t addr,
 | 
					 | 
				
			||||||
			       int user,
 | 
					 | 
				
			||||||
			       int *write_pt)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	pt_element_t *guest_ent;
 | 
					 | 
				
			||||||
	int writable_shadow;
 | 
					 | 
				
			||||||
	gfn_t gfn;
 | 
					 | 
				
			||||||
	struct kvm_mmu_page *page;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (is_writeble_pte(*shadow_ent))
 | 
					 | 
				
			||||||
		return !user || (*shadow_ent & PT_USER_MASK);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK;
 | 
					 | 
				
			||||||
	if (user) {
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * User mode access.  Fail if it's a kernel page or a read-only
 | 
					 | 
				
			||||||
		 * page.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow)
 | 
					 | 
				
			||||||
			return 0;
 | 
					 | 
				
			||||||
		ASSERT(*shadow_ent & PT_USER_MASK);
 | 
					 | 
				
			||||||
	} else
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Kernel mode access.  Fail if it's a read-only page and
 | 
					 | 
				
			||||||
		 * supervisor write protection is enabled.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		if (!writable_shadow) {
 | 
					 | 
				
			||||||
			if (is_write_protection(vcpu))
 | 
					 | 
				
			||||||
				return 0;
 | 
					 | 
				
			||||||
			*shadow_ent &= ~PT_USER_MASK;
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	guest_ent = walker->ptep;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (!is_present_pte(*guest_ent)) {
 | 
					 | 
				
			||||||
		*shadow_ent = 0;
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
						return shadow_ent;
 | 
				
			||||||
	gfn = walker->gfn;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	if (user) {
 | 
					 | 
				
			||||||
		/*
 | 
					 | 
				
			||||||
		 * Usermode page faults won't be for page table updates.
 | 
					 | 
				
			||||||
		 */
 | 
					 | 
				
			||||||
		while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
 | 
					 | 
				
			||||||
			pgprintk("%s: zap %lx %x\n",
 | 
					 | 
				
			||||||
				 __FUNCTION__, gfn, page->role.word);
 | 
					 | 
				
			||||||
			kvm_mmu_zap_page(vcpu, page);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	} else if (kvm_mmu_lookup_page(vcpu, gfn)) {
 | 
					 | 
				
			||||||
		pgprintk("%s: found shadow page for %lx, marking ro\n",
 | 
					 | 
				
			||||||
			 __FUNCTION__, gfn);
 | 
					 | 
				
			||||||
		mark_page_dirty(vcpu->kvm, gfn);
 | 
					 | 
				
			||||||
		FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
 | 
					 | 
				
			||||||
		*guest_ent |= PT_DIRTY_MASK;
 | 
					 | 
				
			||||||
		*write_pt = 1;
 | 
					 | 
				
			||||||
		return 0;
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	mark_page_dirty(vcpu->kvm, gfn);
 | 
					 | 
				
			||||||
	*shadow_ent |= PT_WRITABLE_MASK;
 | 
					 | 
				
			||||||
	FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
 | 
					 | 
				
			||||||
	*guest_ent |= PT_DIRTY_MASK;
 | 
					 | 
				
			||||||
	rmap_add(vcpu, shadow_ent);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return 1;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
| 
						 | 
					@ -397,7 +418,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
				
			||||||
	int fetch_fault = error_code & PFERR_FETCH_MASK;
 | 
						int fetch_fault = error_code & PFERR_FETCH_MASK;
 | 
				
			||||||
	struct guest_walker walker;
 | 
						struct guest_walker walker;
 | 
				
			||||||
	u64 *shadow_pte;
 | 
						u64 *shadow_pte;
 | 
				
			||||||
	int fixed;
 | 
					 | 
				
			||||||
	int write_pt = 0;
 | 
						int write_pt = 0;
 | 
				
			||||||
	int r;
 | 
						int r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -421,27 +441,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 | 
				
			||||||
		pgprintk("%s: guest page fault\n", __FUNCTION__);
 | 
							pgprintk("%s: guest page fault\n", __FUNCTION__);
 | 
				
			||||||
		inject_page_fault(vcpu, addr, walker.error_code);
 | 
							inject_page_fault(vcpu, addr, walker.error_code);
 | 
				
			||||||
		FNAME(release_walker)(&walker);
 | 
							FNAME(release_walker)(&walker);
 | 
				
			||||||
 | 
							vcpu->last_pt_write_count = 0; /* reset fork detector */
 | 
				
			||||||
		return 0;
 | 
							return 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
 | 
						shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
 | 
				
			||||||
	pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
 | 
									  &write_pt);
 | 
				
			||||||
		 shadow_pte, *shadow_pte);
 | 
						pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
 | 
				
			||||||
 | 
							 shadow_pte, *shadow_pte, write_pt);
 | 
				
			||||||
	/*
 | 
					 | 
				
			||||||
	 * Update the shadow pte.
 | 
					 | 
				
			||||||
	 */
 | 
					 | 
				
			||||||
	if (write_fault)
 | 
					 | 
				
			||||||
		fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
 | 
					 | 
				
			||||||
					    user_fault, &write_pt);
 | 
					 | 
				
			||||||
	else
 | 
					 | 
				
			||||||
		fixed = fix_read_pf(shadow_pte);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__,
 | 
					 | 
				
			||||||
		 shadow_pte, *shadow_pte);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	FNAME(release_walker)(&walker);
 | 
						FNAME(release_walker)(&walker);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!write_pt)
 | 
				
			||||||
 | 
							vcpu->last_pt_write_count = 0; /* reset fork detector */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * mmio: emulate if accessible, otherwise its a guest fault.
 | 
						 * mmio: emulate if accessible, otherwise its a guest fault.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
| 
						 | 
					@ -478,7 +491,5 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
 | 
				
			||||||
#undef PT_INDEX
 | 
					#undef PT_INDEX
 | 
				
			||||||
#undef SHADOW_PT_INDEX
 | 
					#undef SHADOW_PT_INDEX
 | 
				
			||||||
#undef PT_LEVEL_MASK
 | 
					#undef PT_LEVEL_MASK
 | 
				
			||||||
#undef PT_PTE_COPY_MASK
 | 
					 | 
				
			||||||
#undef PT_NON_PTE_COPY_MASK
 | 
					 | 
				
			||||||
#undef PT_DIR_BASE_ADDR_MASK
 | 
					#undef PT_DIR_BASE_ADDR_MASK
 | 
				
			||||||
#undef PT_MAX_FULL_LEVELS
 | 
					#undef PT_MAX_FULL_LEVELS
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,16 +14,17 @@
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "kvm_svm.h"
 | 
				
			||||||
 | 
					#include "x86_emulate.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/module.h>
 | 
					#include <linux/module.h>
 | 
				
			||||||
#include <linux/kernel.h>
 | 
					#include <linux/kernel.h>
 | 
				
			||||||
#include <linux/vmalloc.h>
 | 
					#include <linux/vmalloc.h>
 | 
				
			||||||
#include <linux/highmem.h>
 | 
					#include <linux/highmem.h>
 | 
				
			||||||
#include <linux/profile.h>
 | 
					#include <linux/profile.h>
 | 
				
			||||||
#include <linux/sched.h>
 | 
					#include <linux/sched.h>
 | 
				
			||||||
#include <asm/desc.h>
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "kvm_svm.h"
 | 
					#include <asm/desc.h>
 | 
				
			||||||
#include "x86_emulate.h"
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
MODULE_AUTHOR("Qumranet");
 | 
					MODULE_AUTHOR("Qumranet");
 | 
				
			||||||
MODULE_LICENSE("GPL");
 | 
					MODULE_LICENSE("GPL");
 | 
				
			||||||
| 
						 | 
					@ -378,7 +379,7 @@ static __init int svm_hardware_setup(void)
 | 
				
			||||||
	int cpu;
 | 
						int cpu;
 | 
				
			||||||
	struct page *iopm_pages;
 | 
						struct page *iopm_pages;
 | 
				
			||||||
	struct page *msrpm_pages;
 | 
						struct page *msrpm_pages;
 | 
				
			||||||
	void *msrpm_va;
 | 
						void *iopm_va, *msrpm_va;
 | 
				
			||||||
	int r;
 | 
						int r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	kvm_emulator_want_group7_invlpg();
 | 
						kvm_emulator_want_group7_invlpg();
 | 
				
			||||||
| 
						 | 
					@ -387,8 +388,10 @@ static __init int svm_hardware_setup(void)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!iopm_pages)
 | 
						if (!iopm_pages)
 | 
				
			||||||
		return -ENOMEM;
 | 
							return -ENOMEM;
 | 
				
			||||||
	memset(page_address(iopm_pages), 0xff,
 | 
					
 | 
				
			||||||
					PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
 | 
						iopm_va = page_address(iopm_pages);
 | 
				
			||||||
 | 
						memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
 | 
				
			||||||
 | 
						clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
 | 
				
			||||||
	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
 | 
						iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -579,7 +582,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 | 
				
			||||||
		goto out2;
 | 
							goto out2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	vcpu->svm->vmcb = page_address(page);
 | 
						vcpu->svm->vmcb = page_address(page);
 | 
				
			||||||
	memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
 | 
						clear_page(vcpu->svm->vmcb);
 | 
				
			||||||
	vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 | 
						vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 | 
				
			||||||
	vcpu->svm->asid_generation = 0;
 | 
						vcpu->svm->asid_generation = 0;
 | 
				
			||||||
	memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
 | 
						memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
 | 
				
			||||||
| 
						 | 
					@ -587,9 +590,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	fx_init(vcpu);
 | 
						fx_init(vcpu);
 | 
				
			||||||
	vcpu->fpu_active = 1;
 | 
						vcpu->fpu_active = 1;
 | 
				
			||||||
	vcpu->apic_base = 0xfee00000 |
 | 
						vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
 | 
				
			||||||
			/*for vcpu 0*/ MSR_IA32_APICBASE_BSP |
 | 
						if (vcpu == &vcpu->kvm->vcpus[0])
 | 
				
			||||||
			MSR_IA32_APICBASE_ENABLE;
 | 
							vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -955,7 +958,7 @@ static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
				
			||||||
	 * VMCB is undefined after a SHUTDOWN intercept
 | 
						 * VMCB is undefined after a SHUTDOWN intercept
 | 
				
			||||||
	 * so reinitialize it.
 | 
						 * so reinitialize it.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	memset(vcpu->svm->vmcb, 0, PAGE_SIZE);
 | 
						clear_page(vcpu->svm->vmcb);
 | 
				
			||||||
	init_vmcb(vcpu->svm->vmcb);
 | 
						init_vmcb(vcpu->svm->vmcb);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 | 
						kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 | 
				
			||||||
| 
						 | 
					@ -1113,12 +1116,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
 | 
						vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
 | 
				
			||||||
	skip_emulated_instruction(vcpu);
 | 
						skip_emulated_instruction(vcpu);
 | 
				
			||||||
	if (vcpu->irq_summary)
 | 
						return kvm_emulate_halt(vcpu);
 | 
				
			||||||
		return 1;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	kvm_run->exit_reason = KVM_EXIT_HLT;
 | 
					 | 
				
			||||||
	++vcpu->stat.halt_exits;
 | 
					 | 
				
			||||||
	return 0;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
					static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
				
			||||||
| 
						 | 
					@ -1473,6 +1471,11 @@ static void load_db_regs(unsigned long *db_regs)
 | 
				
			||||||
	asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
 | 
						asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						force_new_asid(vcpu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
					static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u16 fs_selector;
 | 
						u16 fs_selector;
 | 
				
			||||||
| 
						 | 
					@ -1481,11 +1484,20 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
				
			||||||
	int r;
 | 
						int r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
again:
 | 
					again:
 | 
				
			||||||
 | 
						r = kvm_mmu_reload(vcpu);
 | 
				
			||||||
 | 
						if (unlikely(r))
 | 
				
			||||||
 | 
							return r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!vcpu->mmio_read_completed)
 | 
						if (!vcpu->mmio_read_completed)
 | 
				
			||||||
		do_interrupt_requests(vcpu, kvm_run);
 | 
							do_interrupt_requests(vcpu, kvm_run);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	clgi();
 | 
						clgi();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						vcpu->guest_mode = 1;
 | 
				
			||||||
 | 
						if (vcpu->requests)
 | 
				
			||||||
 | 
							if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
 | 
				
			||||||
 | 
							    svm_flush_tlb(vcpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pre_svm_run(vcpu);
 | 
						pre_svm_run(vcpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	save_host_msrs(vcpu);
 | 
						save_host_msrs(vcpu);
 | 
				
			||||||
| 
						 | 
					@ -1617,6 +1629,8 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
		: "cc", "memory" );
 | 
							: "cc", "memory" );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						vcpu->guest_mode = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (vcpu->fpu_active) {
 | 
						if (vcpu->fpu_active) {
 | 
				
			||||||
		fx_save(vcpu->guest_fx_image);
 | 
							fx_save(vcpu->guest_fx_image);
 | 
				
			||||||
		fx_restore(vcpu->host_fx_image);
 | 
							fx_restore(vcpu->host_fx_image);
 | 
				
			||||||
| 
						 | 
					@ -1681,11 +1695,6 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 | 
				
			||||||
	return r;
 | 
						return r;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	force_new_asid(vcpu);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 | 
					static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	vcpu->svm->vmcb->save.cr3 = root;
 | 
						vcpu->svm->vmcb->save.cr3 = root;
 | 
				
			||||||
| 
						 | 
					@ -1727,6 +1736,12 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int is_disabled(void)
 | 
					static int is_disabled(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						u64 vm_cr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						rdmsrl(MSR_VM_CR, vm_cr);
 | 
				
			||||||
 | 
						if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
 | 
				
			||||||
 | 
							return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -175,8 +175,11 @@ struct __attribute__ ((__packed__)) vmcb {
 | 
				
			||||||
#define SVM_CPUID_FUNC 0x8000000a
 | 
					#define SVM_CPUID_FUNC 0x8000000a
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MSR_EFER_SVME_MASK (1ULL << 12)
 | 
					#define MSR_EFER_SVME_MASK (1ULL << 12)
 | 
				
			||||||
 | 
					#define MSR_VM_CR       0xc0010114
 | 
				
			||||||
#define MSR_VM_HSAVE_PA 0xc0010117ULL
 | 
					#define MSR_VM_HSAVE_PA 0xc0010117ULL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SVM_VM_CR_SVM_DISABLE 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SVM_SELECTOR_S_SHIFT 4
 | 
					#define SVM_SELECTOR_S_SHIFT 4
 | 
				
			||||||
#define SVM_SELECTOR_DPL_SHIFT 5
 | 
					#define SVM_SELECTOR_DPL_SHIFT 5
 | 
				
			||||||
#define SVM_SELECTOR_P_SHIFT 7
 | 
					#define SVM_SELECTOR_P_SHIFT 7
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
					@ -98,8 +98,11 @@ static u8 opcode_table[256] = {
 | 
				
			||||||
	0, 0, 0, 0,
 | 
						0, 0, 0, 0,
 | 
				
			||||||
	/* 0x40 - 0x4F */
 | 
						/* 0x40 - 0x4F */
 | 
				
			||||||
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
				
			||||||
	/* 0x50 - 0x5F */
 | 
						/* 0x50 - 0x57 */
 | 
				
			||||||
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						0, 0, 0, 0, 0, 0, 0, 0,
 | 
				
			||||||
 | 
						/* 0x58 - 0x5F */
 | 
				
			||||||
 | 
						ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
 | 
				
			||||||
 | 
						ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
 | 
				
			||||||
	/* 0x60 - 0x6F */
 | 
						/* 0x60 - 0x6F */
 | 
				
			||||||
	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
 | 
						0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
 | 
				
			||||||
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
				
			||||||
| 
						 | 
					@ -128,9 +131,9 @@ static u8 opcode_table[256] = {
 | 
				
			||||||
	/* 0xB0 - 0xBF */
 | 
						/* 0xB0 - 0xBF */
 | 
				
			||||||
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
				
			||||||
	/* 0xC0 - 0xC7 */
 | 
						/* 0xC0 - 0xC7 */
 | 
				
			||||||
	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 0, 0,
 | 
						ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
 | 
				
			||||||
	0, 0, ByteOp | DstMem | SrcImm | ModRM | Mov,
 | 
						0, ImplicitOps, 0, 0,
 | 
				
			||||||
	    DstMem | SrcImm | ModRM | Mov,
 | 
						ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
 | 
				
			||||||
	/* 0xC8 - 0xCF */
 | 
						/* 0xC8 - 0xCF */
 | 
				
			||||||
	0, 0, 0, 0, 0, 0, 0, 0,
 | 
						0, 0, 0, 0, 0, 0, 0, 0,
 | 
				
			||||||
	/* 0xD0 - 0xD7 */
 | 
						/* 0xD0 - 0xD7 */
 | 
				
			||||||
| 
						 | 
					@ -143,7 +146,8 @@ static u8 opcode_table[256] = {
 | 
				
			||||||
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
						0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 | 
				
			||||||
	/* 0xF0 - 0xF7 */
 | 
						/* 0xF0 - 0xF7 */
 | 
				
			||||||
	0, 0, 0, 0,
 | 
						0, 0, 0, 0,
 | 
				
			||||||
	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
 | 
						ImplicitOps, 0,
 | 
				
			||||||
 | 
						ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
 | 
				
			||||||
	/* 0xF8 - 0xFF */
 | 
						/* 0xF8 - 0xFF */
 | 
				
			||||||
	0, 0, 0, 0,
 | 
						0, 0, 0, 0,
 | 
				
			||||||
	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
 | 
						0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
 | 
				
			||||||
| 
						 | 
					@ -152,7 +156,7 @@ static u8 opcode_table[256] = {
 | 
				
			||||||
static u16 twobyte_table[256] = {
 | 
					static u16 twobyte_table[256] = {
 | 
				
			||||||
	/* 0x00 - 0x0F */
 | 
						/* 0x00 - 0x0F */
 | 
				
			||||||
	0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
 | 
						0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
 | 
				
			||||||
	0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
 | 
						0, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
 | 
				
			||||||
	/* 0x10 - 0x1F */
 | 
						/* 0x10 - 0x1F */
 | 
				
			||||||
	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
 | 
						0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
 | 
				
			||||||
	/* 0x20 - 0x2F */
 | 
						/* 0x20 - 0x2F */
 | 
				
			||||||
| 
						 | 
					@ -481,6 +485,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 | 
				
			||||||
	int mode = ctxt->mode;
 | 
						int mode = ctxt->mode;
 | 
				
			||||||
	unsigned long modrm_ea;
 | 
						unsigned long modrm_ea;
 | 
				
			||||||
	int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0;
 | 
						int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0;
 | 
				
			||||||
 | 
						int no_wb = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Shadow copy of register state. Committed on successful emulation. */
 | 
						/* Shadow copy of register state. Committed on successful emulation. */
 | 
				
			||||||
	unsigned long _regs[NR_VCPU_REGS];
 | 
						unsigned long _regs[NR_VCPU_REGS];
 | 
				
			||||||
| 
						 | 
					@ -1047,7 +1052,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 | 
				
			||||||
						      _regs[VCPU_REGS_RSP]),
 | 
											      _regs[VCPU_REGS_RSP]),
 | 
				
			||||||
				     &dst.val, dst.bytes, ctxt)) != 0)
 | 
									     &dst.val, dst.bytes, ctxt)) != 0)
 | 
				
			||||||
				goto done;
 | 
									goto done;
 | 
				
			||||||
			dst.val = dst.orig_val;	/* skanky: disable writeback */
 | 
								no_wb = 1;
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
		default:
 | 
							default:
 | 
				
			||||||
			goto cannot_emulate;
 | 
								goto cannot_emulate;
 | 
				
			||||||
| 
						 | 
					@ -1056,7 +1061,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
writeback:
 | 
					writeback:
 | 
				
			||||||
	if ((d & Mov) || (dst.orig_val != dst.val)) {
 | 
						if (!no_wb) {
 | 
				
			||||||
		switch (dst.type) {
 | 
							switch (dst.type) {
 | 
				
			||||||
		case OP_REG:
 | 
							case OP_REG:
 | 
				
			||||||
			/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
 | 
								/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
 | 
				
			||||||
| 
						 | 
					@ -1149,6 +1154,23 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 | 
				
			||||||
	case 0xae ... 0xaf:	/* scas */
 | 
						case 0xae ... 0xaf:	/* scas */
 | 
				
			||||||
		DPRINTF("Urk! I don't handle SCAS.\n");
 | 
							DPRINTF("Urk! I don't handle SCAS.\n");
 | 
				
			||||||
		goto cannot_emulate;
 | 
							goto cannot_emulate;
 | 
				
			||||||
 | 
						case 0xf4:              /* hlt */
 | 
				
			||||||
 | 
							ctxt->vcpu->halt_request = 1;
 | 
				
			||||||
 | 
							goto done;
 | 
				
			||||||
 | 
						case 0xc3: /* ret */
 | 
				
			||||||
 | 
							dst.ptr = &_eip;
 | 
				
			||||||
 | 
							goto pop_instruction;
 | 
				
			||||||
 | 
						case 0x58 ... 0x5f: /* pop reg */
 | 
				
			||||||
 | 
							dst.ptr = (unsigned long *)&_regs[b & 0x7];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pop_instruction:
 | 
				
			||||||
 | 
							if ((rc = ops->read_std(register_address(ctxt->ss_base,
 | 
				
			||||||
 | 
								_regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt)) != 0)
 | 
				
			||||||
 | 
								goto done;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
 | 
				
			||||||
 | 
							no_wb = 1; /* Disable writeback. */
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	goto writeback;
 | 
						goto writeback;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1302,8 +1324,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
twobyte_special_insn:
 | 
					twobyte_special_insn:
 | 
				
			||||||
	/* Disable writeback. */
 | 
						/* Disable writeback. */
 | 
				
			||||||
	dst.orig_val = dst.val;
 | 
						no_wb = 1;
 | 
				
			||||||
	switch (b) {
 | 
						switch (b) {
 | 
				
			||||||
 | 
						case 0x09:		/* wbinvd */
 | 
				
			||||||
 | 
							break;
 | 
				
			||||||
	case 0x0d:		/* GrpP (prefetch) */
 | 
						case 0x0d:		/* GrpP (prefetch) */
 | 
				
			||||||
	case 0x18:		/* Grp16 (prefetch/nop) */
 | 
						case 0x18:		/* Grp16 (prefetch/nop) */
 | 
				
			||||||
		break;
 | 
							break;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -139,6 +139,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
 | 
				
			||||||
	put_filp(file);
 | 
						put_filp(file);
 | 
				
			||||||
	return error;
 | 
						return error;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					EXPORT_SYMBOL_GPL(anon_inode_getfd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * A single inode exists for all anon_inode files. Contrary to pipes,
 | 
					 * A single inode exists for all anon_inode files. Contrary to pipes,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,7 +13,6 @@
 | 
				
			||||||
#define HPFS_SUPER_MAGIC	0xf995e849
 | 
					#define HPFS_SUPER_MAGIC	0xf995e849
 | 
				
			||||||
#define ISOFS_SUPER_MAGIC	0x9660
 | 
					#define ISOFS_SUPER_MAGIC	0x9660
 | 
				
			||||||
#define JFFS2_SUPER_MAGIC	0x72b6
 | 
					#define JFFS2_SUPER_MAGIC	0x72b6
 | 
				
			||||||
#define KVMFS_SUPER_MAGIC	0x19700426
 | 
					 | 
				
			||||||
#define ANON_INODE_FS_MAGIC	0x09041934
 | 
					#define ANON_INODE_FS_MAGIC	0x09041934
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define MINIX_SUPER_MAGIC	0x137F		/* original minix fs */
 | 
					#define MINIX_SUPER_MAGIC	0x137F		/* original minix fs */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
 | 
				
			||||||
#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 | 
					#define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
 | 
				
			||||||
#define CPU_LOCK_ACQUIRE	0x0008 /* Acquire all hotcpu locks */
 | 
					#define CPU_LOCK_ACQUIRE	0x0008 /* Acquire all hotcpu locks */
 | 
				
			||||||
#define CPU_LOCK_RELEASE	0x0009 /* Release all hotcpu locks */
 | 
					#define CPU_LOCK_RELEASE	0x0009 /* Release all hotcpu locks */
 | 
				
			||||||
 | 
					#define CPU_DYING		0x000A /* CPU (unsigned)v not running any task,
 | 
				
			||||||
 | 
									        * not handling interrupts, soon dead */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
 | 
					/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
 | 
				
			||||||
 * operation in progress
 | 
					 * operation in progress
 | 
				
			||||||
| 
						 | 
					@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
 | 
				
			||||||
#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
 | 
					#define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
 | 
				
			||||||
#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 | 
					#define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 | 
				
			||||||
#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
 | 
					#define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
 | 
				
			||||||
 | 
					#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* __KERNEL__ */
 | 
					#endif /* __KERNEL__ */
 | 
				
			||||||
#endif /* _LINUX_NOTIFIER_H */
 | 
					#endif /* _LINUX_NOTIFIER_H */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,6 +7,7 @@
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <linux/errno.h>
 | 
					#include <linux/errno.h>
 | 
				
			||||||
 | 
					#include <asm/system.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void cpu_idle(void);
 | 
					extern void cpu_idle(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -102,7 +103,11 @@ static inline void smp_send_reschedule(int cpu) { }
 | 
				
			||||||
static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
 | 
					static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
 | 
				
			||||||
					   void *info, int retry, int wait)
 | 
										   void *info, int retry, int wait)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return -EBUSY;
 | 
						WARN_ON(cpuid != 0);
 | 
				
			||||||
 | 
						local_irq_disable();
 | 
				
			||||||
 | 
						func(info);
 | 
				
			||||||
 | 
						local_irq_enable();
 | 
				
			||||||
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* !SMP */
 | 
					#endif /* !SMP */
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										16
									
								
								kernel/cpu.c
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								kernel/cpu.c
									
									
									
									
									
								
							| 
						 | 
					@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu)
 | 
				
			||||||
	write_unlock_irq(&tasklist_lock);
 | 
						write_unlock_irq(&tasklist_lock);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct take_cpu_down_param {
 | 
				
			||||||
 | 
						unsigned long mod;
 | 
				
			||||||
 | 
						void *hcpu;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Take this CPU down. */
 | 
					/* Take this CPU down. */
 | 
				
			||||||
static int take_cpu_down(void *unused)
 | 
					static int take_cpu_down(void *_param)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						struct take_cpu_down_param *param = _param;
 | 
				
			||||||
	int err;
 | 
						int err;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
 | 
				
			||||||
 | 
									param->hcpu);
 | 
				
			||||||
	/* Ensure this CPU doesn't handle any more interrupts. */
 | 
						/* Ensure this CPU doesn't handle any more interrupts. */
 | 
				
			||||||
	err = __cpu_disable();
 | 
						err = __cpu_disable();
 | 
				
			||||||
	if (err < 0)
 | 
						if (err < 0)
 | 
				
			||||||
| 
						 | 
					@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
 | 
				
			||||||
	cpumask_t old_allowed, tmp;
 | 
						cpumask_t old_allowed, tmp;
 | 
				
			||||||
	void *hcpu = (void *)(long)cpu;
 | 
						void *hcpu = (void *)(long)cpu;
 | 
				
			||||||
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 | 
						unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 | 
				
			||||||
 | 
						struct take_cpu_down_param tcd_param = {
 | 
				
			||||||
 | 
							.mod = mod,
 | 
				
			||||||
 | 
							.hcpu = hcpu,
 | 
				
			||||||
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (num_online_cpus() == 1)
 | 
						if (num_online_cpus() == 1)
 | 
				
			||||||
		return -EBUSY;
 | 
							return -EBUSY;
 | 
				
			||||||
| 
						 | 
					@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
 | 
				
			||||||
	set_cpus_allowed(current, tmp);
 | 
						set_cpus_allowed(current, tmp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	mutex_lock(&cpu_bitmask_lock);
 | 
						mutex_lock(&cpu_bitmask_lock);
 | 
				
			||||||
	p = __stop_machine_run(take_cpu_down, NULL, cpu);
 | 
						p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
 | 
				
			||||||
	mutex_unlock(&cpu_bitmask_lock);
 | 
						mutex_unlock(&cpu_bitmask_lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (IS_ERR(p) || cpu_online(cpu)) {
 | 
						if (IS_ERR(p) || cpu_online(cpu)) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void)
 | 
				
			||||||
static int cpuset_handle_cpuhp(struct notifier_block *nb,
 | 
					static int cpuset_handle_cpuhp(struct notifier_block *nb,
 | 
				
			||||||
				unsigned long phase, void *cpu)
 | 
									unsigned long phase, void *cpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
 | 
				
			||||||
 | 
							return NOTIFY_DONE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	common_cpu_mem_hotplug_unplug();
 | 
						common_cpu_mem_hotplug_unplug();
 | 
				
			||||||
	return 0;
 | 
						return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue