mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 10:40:15 +02:00 
			
		
		
		
	KVM: X86: Provide a capability to disable MWAIT intercepts
Allowing a guest to execute MWAIT without interception enables a guest to put a (physical) CPU into a power saving state, where it takes longer to return from than what may be desired by the host. Don't give a guest that power over a host by default. (Especially, since nothing prevents a guest from using MWAIT even when it is not advertised via CPUID.) Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Jan H. Schönherr <jschoenh@amazon.de> Signed-off-by: Wanpeng Li <wanpengli@tencent.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
		
							parent
							
								
									4956aa3b8b
								
							
						
					
					
						commit
						4d5422cea3
					
				
					 8 changed files with 53 additions and 25 deletions
				
			
		| 
						 | 
				
			
			@ -4358,6 +4358,24 @@ enables QEMU to build error log and branch to guest kernel registered
 | 
			
		|||
machine check handling routine. Without this capability KVM will
 | 
			
		||||
branch to guests' 0x200 interrupt vector.
 | 
			
		||||
 | 
			
		||||
7.13 KVM_CAP_X86_DISABLE_EXITS
 | 
			
		||||
 | 
			
		||||
Architectures: x86
 | 
			
		||||
Parameters: args[0] defines which exits are disabled
 | 
			
		||||
Returns: 0 on success, -EINVAL when args[0] contains invalid exits
 | 
			
		||||
 | 
			
		||||
Valid bits in args[0] are
 | 
			
		||||
 | 
			
		||||
#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
 | 
			
		||||
 | 
			
		||||
Enabling this capability on a VM provides userspace with a way to no
 | 
			
		||||
longer intercept some instructions for improved latency in some
 | 
			
		||||
workloads, and is suggested when vCPUs are associated to dedicated
 | 
			
		||||
physical CPUs.  More bits can be added in the future; userspace can
 | 
			
		||||
just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
 | 
			
		||||
all such vmexits.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
8. Other capabilities.
 | 
			
		||||
----------------------
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -4470,15 +4488,6 @@ reserved.
 | 
			
		|||
    Both registers and addresses are 64-bits wide.
 | 
			
		||||
    It will be possible to run 64-bit or 32-bit guest code.
 | 
			
		||||
 | 
			
		||||
8.8 KVM_CAP_X86_GUEST_MWAIT
 | 
			
		||||
 | 
			
		||||
Architectures: x86
 | 
			
		||||
 | 
			
		||||
This capability indicates that guest using memory monotoring instructions
 | 
			
		||||
(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
 | 
			
		||||
spent while virtual CPU is halted in this way will then be accounted for as
 | 
			
		||||
guest running time on the host (as opposed to e.g. HLT).
 | 
			
		||||
 | 
			
		||||
8.9 KVM_CAP_ARM_USER_IRQ
 | 
			
		||||
 | 
			
		||||
Architectures: arm, arm64
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -811,6 +811,8 @@ struct kvm_arch {
 | 
			
		|||
 | 
			
		||||
	gpa_t wall_clock;
 | 
			
		||||
 | 
			
		||||
	bool mwait_in_guest;
 | 
			
		||||
 | 
			
		||||
	bool ept_identity_pagetable_done;
 | 
			
		||||
	gpa_t ept_identity_map_addr;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1398,7 +1398,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 | 
			
		|||
	set_intercept(svm, INTERCEPT_XSETBV);
 | 
			
		||||
	set_intercept(svm, INTERCEPT_RSM);
 | 
			
		||||
 | 
			
		||||
	if (!kvm_mwait_in_guest()) {
 | 
			
		||||
	if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
 | 
			
		||||
		set_intercept(svm, INTERCEPT_MONITOR);
 | 
			
		||||
		set_intercept(svm, INTERCEPT_MWAIT);
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3746,13 +3746,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 | 
			
		|||
	      CPU_BASED_UNCOND_IO_EXITING |
 | 
			
		||||
	      CPU_BASED_MOV_DR_EXITING |
 | 
			
		||||
	      CPU_BASED_USE_TSC_OFFSETING |
 | 
			
		||||
	      CPU_BASED_MWAIT_EXITING |
 | 
			
		||||
	      CPU_BASED_MONITOR_EXITING |
 | 
			
		||||
	      CPU_BASED_INVLPG_EXITING |
 | 
			
		||||
	      CPU_BASED_RDPMC_EXITING;
 | 
			
		||||
 | 
			
		||||
	if (!kvm_mwait_in_guest())
 | 
			
		||||
		min |= CPU_BASED_MWAIT_EXITING |
 | 
			
		||||
			CPU_BASED_MONITOR_EXITING;
 | 
			
		||||
 | 
			
		||||
	opt = CPU_BASED_TPR_SHADOW |
 | 
			
		||||
	      CPU_BASED_USE_MSR_BITMAPS |
 | 
			
		||||
	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 | 
			
		||||
| 
						 | 
				
			
			@ -5544,6 +5542,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 | 
			
		|||
		exec_control |= CPU_BASED_CR3_STORE_EXITING |
 | 
			
		||||
				CPU_BASED_CR3_LOAD_EXITING  |
 | 
			
		||||
				CPU_BASED_INVLPG_EXITING;
 | 
			
		||||
	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
 | 
			
		||||
		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
 | 
			
		||||
				CPU_BASED_MONITOR_EXITING);
 | 
			
		||||
	return exec_control;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2813,9 +2813,15 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
 | 
			
		|||
	return r;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool kvm_can_mwait_in_guest(void)
 | 
			
		||||
{
 | 
			
		||||
	return boot_cpu_has(X86_FEATURE_MWAIT) &&
 | 
			
		||||
		!boot_cpu_has_bug(X86_BUG_MONITOR);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 | 
			
		||||
{
 | 
			
		||||
	int r;
 | 
			
		||||
	int r = 0;
 | 
			
		||||
 | 
			
		||||
	switch (ext) {
 | 
			
		||||
	case KVM_CAP_IRQCHIP:
 | 
			
		||||
| 
						 | 
				
			
			@ -2871,8 +2877,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 | 
			
		|||
	case KVM_CAP_ADJUST_CLOCK:
 | 
			
		||||
		r = KVM_CLOCK_TSC_STABLE;
 | 
			
		||||
		break;
 | 
			
		||||
	case KVM_CAP_X86_GUEST_MWAIT:
 | 
			
		||||
		r = kvm_mwait_in_guest();
 | 
			
		||||
	case KVM_CAP_X86_DISABLE_EXITS:
 | 
			
		||||
		if(kvm_can_mwait_in_guest())
 | 
			
		||||
			r |= KVM_X86_DISABLE_EXITS_MWAIT;
 | 
			
		||||
		break;
 | 
			
		||||
	case KVM_CAP_X86_SMM:
 | 
			
		||||
		/* SMBASE is usually relocated above 1M on modern chipsets,
 | 
			
		||||
| 
						 | 
				
			
			@ -2913,7 +2920,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 | 
			
		|||
		r = KVM_X2APIC_API_VALID_FLAGS;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		r = 0;
 | 
			
		||||
		break;
 | 
			
		||||
	}
 | 
			
		||||
	return r;
 | 
			
		||||
| 
						 | 
				
			
			@ -4218,6 +4224,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 | 
			
		|||
 | 
			
		||||
		r = 0;
 | 
			
		||||
		break;
 | 
			
		||||
	case KVM_CAP_X86_DISABLE_EXITS:
 | 
			
		||||
		r = -EINVAL;
 | 
			
		||||
		if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
 | 
			
		||||
			kvm_can_mwait_in_guest())
 | 
			
		||||
			kvm->arch.mwait_in_guest = true;
 | 
			
		||||
		r = 0;
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		r = -EINVAL;
 | 
			
		||||
		break;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,8 +2,6 @@
 | 
			
		|||
#ifndef ARCH_X86_KVM_X86_H
 | 
			
		||||
#define ARCH_X86_KVM_X86_H
 | 
			
		||||
 | 
			
		||||
#include <asm/processor.h>
 | 
			
		||||
#include <asm/mwait.h>
 | 
			
		||||
#include <linux/kvm_host.h>
 | 
			
		||||
#include <asm/pvclock.h>
 | 
			
		||||
#include "kvm_cache_regs.h"
 | 
			
		||||
| 
						 | 
				
			
			@ -266,10 +264,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 | 
			
		|||
	    __rem;						\
 | 
			
		||||
	 })
 | 
			
		||||
 | 
			
		||||
static inline bool kvm_mwait_in_guest(void)
 | 
			
		||||
#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
 | 
			
		||||
#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT)
 | 
			
		||||
 | 
			
		||||
static inline bool kvm_mwait_in_guest(struct kvm *kvm)
 | 
			
		||||
{
 | 
			
		||||
	return boot_cpu_has(X86_FEATURE_MWAIT) &&
 | 
			
		||||
		!boot_cpu_has_bug(X86_BUG_MONITOR);
 | 
			
		||||
	return kvm->arch.mwait_in_guest;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -929,7 +929,7 @@ struct kvm_ppc_resize_hpt {
 | 
			
		|||
#define KVM_CAP_S390_GS 140
 | 
			
		||||
#define KVM_CAP_S390_AIS 141
 | 
			
		||||
#define KVM_CAP_SPAPR_TCE_VFIO 142
 | 
			
		||||
#define KVM_CAP_X86_GUEST_MWAIT 143
 | 
			
		||||
#define KVM_CAP_X86_DISABLE_EXITS 143
 | 
			
		||||
#define KVM_CAP_ARM_USER_IRQ 144
 | 
			
		||||
#define KVM_CAP_S390_CMMA_MIGRATION 145
 | 
			
		||||
#define KVM_CAP_PPC_FWNMI 146
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -924,7 +924,7 @@ struct kvm_ppc_resize_hpt {
 | 
			
		|||
#define KVM_CAP_S390_GS 140
 | 
			
		||||
#define KVM_CAP_S390_AIS 141
 | 
			
		||||
#define KVM_CAP_SPAPR_TCE_VFIO 142
 | 
			
		||||
#define KVM_CAP_X86_GUEST_MWAIT 143
 | 
			
		||||
#define KVM_CAP_X86_DISABLE_EXITS 143
 | 
			
		||||
#define KVM_CAP_ARM_USER_IRQ 144
 | 
			
		||||
#define KVM_CAP_S390_CMMA_MIGRATION 145
 | 
			
		||||
#define KVM_CAP_PPC_FWNMI 146
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue