forked from mirrors/linux
		
	KVM: lapic: Track lapic timer advance per vCPU
Automatically adjusting the globally-shared timer advancement could
corrupt the timer, e.g. if multiple vCPUs are concurrently adjusting
the advancement value.  That could be partially fixed by using a local
variable for the arithmetic, but it would still be susceptible to a
race when setting timer_advance_adjust_done.
And because virtual_tsc_khz and tsc_scaling_ratio are per-vCPU, the
correct calibration for a given vCPU may not apply to all vCPUs.
Furthermore, lapic_timer_advance_ns is marked __read_mostly, which is
effectively violated when finding a stable advancement takes an extended
amount of timer.
Opportunistically change the definition of lapic_timer_advance_ns to
a u32 so that it matches the style of struct kvm_timer.  Explicitly
pass the param to kvm_create_lapic() so that it doesn't have to be
exposed to lapic.c, thus reducing the probability of unintentionally
using the global value instead of the per-vCPU value.
Cc: Liran Alon <liran.alon@oracle.com>
Cc: Wanpeng Li <wanpengli@tencent.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Cc: stable@vger.kernel.org
Fixes: 3b8a5df6c4 ("KVM: LAPIC: Tune lapic_timer_advance_ns automatically")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
			
			
This commit is contained in:
		
							parent
							
								
									57bf67e73c
								
							
						
					
					
						commit
						39497d7660
					
				
					 5 changed files with 28 additions and 25 deletions
				
			
		| 
						 | 
					@ -70,7 +70,6 @@
 | 
				
			||||||
#define APIC_BROADCAST			0xFF
 | 
					#define APIC_BROADCAST			0xFF
 | 
				
			||||||
#define X2APIC_BROADCAST		0xFFFFFFFFul
 | 
					#define X2APIC_BROADCAST		0xFFFFFFFFul
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool lapic_timer_advance_adjust_done = false;
 | 
					 | 
				
			||||||
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
 | 
					#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
 | 
				
			||||||
/* step-by-step approximation to mitigate fluctuation */
 | 
					/* step-by-step approximation to mitigate fluctuation */
 | 
				
			||||||
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
 | 
					#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
 | 
				
			||||||
| 
						 | 
					@ -1485,6 +1484,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
 | 
				
			||||||
void wait_lapic_expire(struct kvm_vcpu *vcpu)
 | 
					void wait_lapic_expire(struct kvm_vcpu *vcpu)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm_lapic *apic = vcpu->arch.apic;
 | 
						struct kvm_lapic *apic = vcpu->arch.apic;
 | 
				
			||||||
 | 
						u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
 | 
				
			||||||
	u64 guest_tsc, tsc_deadline, ns;
 | 
						u64 guest_tsc, tsc_deadline, ns;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!lapic_in_kernel(vcpu))
 | 
						if (!lapic_in_kernel(vcpu))
 | 
				
			||||||
| 
						 | 
					@ -1504,34 +1504,36 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
 | 
						/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
 | 
				
			||||||
	if (guest_tsc < tsc_deadline)
 | 
						if (guest_tsc < tsc_deadline)
 | 
				
			||||||
		__delay(min(tsc_deadline - guest_tsc,
 | 
							__delay(min(tsc_deadline - guest_tsc,
 | 
				
			||||||
			nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
 | 
								nsec_to_cycles(vcpu, timer_advance_ns)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (!lapic_timer_advance_adjust_done) {
 | 
						if (!apic->lapic_timer.timer_advance_adjust_done) {
 | 
				
			||||||
		/* too early */
 | 
							/* too early */
 | 
				
			||||||
		if (guest_tsc < tsc_deadline) {
 | 
							if (guest_tsc < tsc_deadline) {
 | 
				
			||||||
			ns = (tsc_deadline - guest_tsc) * 1000000ULL;
 | 
								ns = (tsc_deadline - guest_tsc) * 1000000ULL;
 | 
				
			||||||
			do_div(ns, vcpu->arch.virtual_tsc_khz);
 | 
								do_div(ns, vcpu->arch.virtual_tsc_khz);
 | 
				
			||||||
			lapic_timer_advance_ns -= min((unsigned int)ns,
 | 
								timer_advance_ns -= min((u32)ns,
 | 
				
			||||||
				lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 | 
									timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
		/* too late */
 | 
							/* too late */
 | 
				
			||||||
			ns = (guest_tsc - tsc_deadline) * 1000000ULL;
 | 
								ns = (guest_tsc - tsc_deadline) * 1000000ULL;
 | 
				
			||||||
			do_div(ns, vcpu->arch.virtual_tsc_khz);
 | 
								do_div(ns, vcpu->arch.virtual_tsc_khz);
 | 
				
			||||||
			lapic_timer_advance_ns += min((unsigned int)ns,
 | 
								timer_advance_ns += min((u32)ns,
 | 
				
			||||||
				lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 | 
									timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
 | 
							if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
 | 
				
			||||||
			lapic_timer_advance_adjust_done = true;
 | 
								apic->lapic_timer.timer_advance_adjust_done = true;
 | 
				
			||||||
		if (unlikely(lapic_timer_advance_ns > 5000)) {
 | 
							if (unlikely(timer_advance_ns > 5000)) {
 | 
				
			||||||
			lapic_timer_advance_ns = 0;
 | 
								timer_advance_ns = 0;
 | 
				
			||||||
			lapic_timer_advance_adjust_done = true;
 | 
								apic->lapic_timer.timer_advance_adjust_done = true;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
							apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void start_sw_tscdeadline(struct kvm_lapic *apic)
 | 
					static void start_sw_tscdeadline(struct kvm_lapic *apic)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
 | 
						struct kvm_timer *ktimer = &apic->lapic_timer;
 | 
				
			||||||
 | 
						u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
 | 
				
			||||||
	u64 ns = 0;
 | 
						u64 ns = 0;
 | 
				
			||||||
	ktime_t expire;
 | 
						ktime_t expire;
 | 
				
			||||||
	struct kvm_vcpu *vcpu = apic->vcpu;
 | 
						struct kvm_vcpu *vcpu = apic->vcpu;
 | 
				
			||||||
| 
						 | 
					@ -1551,11 +1553,10 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 | 
				
			||||||
	do_div(ns, this_tsc_khz);
 | 
						do_div(ns, this_tsc_khz);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (likely(tscdeadline > guest_tsc) &&
 | 
						if (likely(tscdeadline > guest_tsc) &&
 | 
				
			||||||
	    likely(ns > lapic_timer_advance_ns)) {
 | 
						    likely(ns > apic->lapic_timer.timer_advance_ns)) {
 | 
				
			||||||
		expire = ktime_add_ns(now, ns);
 | 
							expire = ktime_add_ns(now, ns);
 | 
				
			||||||
		expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
 | 
							expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
 | 
				
			||||||
		hrtimer_start(&apic->lapic_timer.timer,
 | 
							hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
 | 
				
			||||||
				expire, HRTIMER_MODE_ABS_PINNED);
 | 
					 | 
				
			||||||
	} else
 | 
						} else
 | 
				
			||||||
		apic_timer_expired(apic);
 | 
							apic_timer_expired(apic);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2262,7 +2263,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 | 
				
			||||||
		return HRTIMER_NORESTART;
 | 
							return HRTIMER_NORESTART;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_create_lapic(struct kvm_vcpu *vcpu)
 | 
					int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm_lapic *apic;
 | 
						struct kvm_lapic *apic;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2286,6 +2287,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 | 
						hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 | 
				
			||||||
		     HRTIMER_MODE_ABS_PINNED);
 | 
							     HRTIMER_MODE_ABS_PINNED);
 | 
				
			||||||
	apic->lapic_timer.timer.function = apic_timer_fn;
 | 
						apic->lapic_timer.timer.function = apic_timer_fn;
 | 
				
			||||||
 | 
						apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
 | 
						 * APIC is created enabled. This will prevent kvm_lapic_set_base from
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -31,8 +31,10 @@ struct kvm_timer {
 | 
				
			||||||
	u32 timer_mode_mask;
 | 
						u32 timer_mode_mask;
 | 
				
			||||||
	u64 tscdeadline;
 | 
						u64 tscdeadline;
 | 
				
			||||||
	u64 expired_tscdeadline;
 | 
						u64 expired_tscdeadline;
 | 
				
			||||||
 | 
						u32 timer_advance_ns;
 | 
				
			||||||
	atomic_t pending;			/* accumulated triggered timers */
 | 
						atomic_t pending;			/* accumulated triggered timers */
 | 
				
			||||||
	bool hv_timer_in_use;
 | 
						bool hv_timer_in_use;
 | 
				
			||||||
 | 
						bool timer_advance_adjust_done;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct kvm_lapic {
 | 
					struct kvm_lapic {
 | 
				
			||||||
| 
						 | 
					@ -62,7 +64,7 @@ struct kvm_lapic {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct dest_map;
 | 
					struct dest_map;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_create_lapic(struct kvm_vcpu *vcpu);
 | 
					int kvm_create_lapic(struct kvm_vcpu *vcpu, u32 timer_advance_ns);
 | 
				
			||||||
void kvm_free_lapic(struct kvm_vcpu *vcpu);
 | 
					void kvm_free_lapic(struct kvm_vcpu *vcpu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
 | 
					int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7032,6 +7032,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct vcpu_vmx *vmx;
 | 
						struct vcpu_vmx *vmx;
 | 
				
			||||||
	u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
 | 
						u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
 | 
				
			||||||
 | 
						struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (kvm_mwait_in_guest(vcpu->kvm))
 | 
						if (kvm_mwait_in_guest(vcpu->kvm))
 | 
				
			||||||
		return -EOPNOTSUPP;
 | 
							return -EOPNOTSUPP;
 | 
				
			||||||
| 
						 | 
					@ -7040,7 +7041,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
 | 
				
			||||||
	tscl = rdtsc();
 | 
						tscl = rdtsc();
 | 
				
			||||||
	guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
 | 
						guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
 | 
				
			||||||
	delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
 | 
						delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
 | 
				
			||||||
	lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
 | 
						lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
 | 
				
			||||||
 | 
											    ktimer->timer_advance_ns);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (delta_tsc > lapic_timer_advance_cycles)
 | 
						if (delta_tsc > lapic_timer_advance_cycles)
 | 
				
			||||||
		delta_tsc -= lapic_timer_advance_cycles;
 | 
							delta_tsc -= lapic_timer_advance_cycles;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -137,9 +137,8 @@ static u32 __read_mostly tsc_tolerance_ppm = 250;
 | 
				
			||||||
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 | 
					module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
 | 
					/* lapic timer advance (tscdeadline mode only) in nanoseconds */
 | 
				
			||||||
unsigned int __read_mostly lapic_timer_advance_ns = 1000;
 | 
					static u32 __read_mostly lapic_timer_advance_ns = 1000;
 | 
				
			||||||
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 | 
					module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 | 
				
			||||||
EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool __read_mostly vector_hashing = true;
 | 
					static bool __read_mostly vector_hashing = true;
 | 
				
			||||||
module_param(vector_hashing, bool, S_IRUGO);
 | 
					module_param(vector_hashing, bool, S_IRUGO);
 | 
				
			||||||
| 
						 | 
					@ -7873,7 +7872,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	trace_kvm_entry(vcpu->vcpu_id);
 | 
						trace_kvm_entry(vcpu->vcpu_id);
 | 
				
			||||||
	if (lapic_timer_advance_ns)
 | 
						if (vcpu->arch.apic->lapic_timer.timer_advance_ns)
 | 
				
			||||||
		wait_lapic_expire(vcpu);
 | 
							wait_lapic_expire(vcpu);
 | 
				
			||||||
	guest_enter_irqoff();
 | 
						guest_enter_irqoff();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9061,7 +9060,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (irqchip_in_kernel(vcpu->kvm)) {
 | 
						if (irqchip_in_kernel(vcpu->kvm)) {
 | 
				
			||||||
		vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
 | 
							vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
 | 
				
			||||||
		r = kvm_create_lapic(vcpu);
 | 
							r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
 | 
				
			||||||
		if (r < 0)
 | 
							if (r < 0)
 | 
				
			||||||
			goto fail_mmu_destroy;
 | 
								goto fail_mmu_destroy;
 | 
				
			||||||
	} else
 | 
						} else
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern unsigned int min_timer_period_us;
 | 
					extern unsigned int min_timer_period_us;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern unsigned int lapic_timer_advance_ns;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
extern bool enable_vmware_backdoor;
 | 
					extern bool enable_vmware_backdoor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern struct static_key kvm_no_apic_vcpu;
 | 
					extern struct static_key kvm_no_apic_vcpu;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue