mirror of
				https://github.com/torvalds/linux.git
				synced 2025-11-04 02:30:34 +02:00 
			
		
		
		
	KVM: x86: fix singlestepping over syscall
TF is handled a bit differently for syscall and sysret, compared to the other instructions: TF is checked after the instruction completes, so that the OS can disable #DB at a syscall by adding TF to FMASK. When the sysret is executed the #DB is taken "as if" the syscall insn just completed. KVM emulates syscall so that it can trap 32-bit syscall on Intel processors. Fix the behavior, otherwise you could get #DB on a user stack which is not nice. This does not affect Linux guests, as they use an IST or task gate for #DB. This fixes CVE-2017-7518. Cc: stable@vger.kernel.org Reported-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
This commit is contained in:
		
							parent
							
								
									d6aa07c169
								
							
						
					
					
						commit
						c8401dda2f
					
				
					 3 changed files with 34 additions and 30 deletions
				
			
		| 
						 | 
					@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bool perm_ok; /* do not check permissions if true */
 | 
						bool perm_ok; /* do not check permissions if true */
 | 
				
			||||||
	bool ud;	/* inject an #UD if host doesn't support insn */
 | 
						bool ud;	/* inject an #UD if host doesn't support insn */
 | 
				
			||||||
 | 
						bool tf;	/* TF value before instruction (after for syscall/sysret) */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bool have_exception;
 | 
						bool have_exception;
 | 
				
			||||||
	struct x86_exception exception;
 | 
						struct x86_exception exception;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 | 
				
			||||||
		ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 | 
							ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 | 
				
			||||||
	return X86EMUL_CONTINUE;
 | 
						return X86EMUL_CONTINUE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 | 
						kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ctxt->eflags = kvm_get_rflags(vcpu);
 | 
						ctxt->eflags = kvm_get_rflags(vcpu);
 | 
				
			||||||
 | 
						ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ctxt->eip = kvm_rip_read(vcpu);
 | 
						ctxt->eip = kvm_rip_read(vcpu);
 | 
				
			||||||
	ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
 | 
						ctxt->mode = (!is_protmode(vcpu))		? X86EMUL_MODE_REAL :
 | 
				
			||||||
		     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
 | 
							     (ctxt->eflags & X86_EFLAGS_VM)	? X86EMUL_MODE_VM86 :
 | 
				
			||||||
| 
						 | 
					@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 | 
				
			||||||
	return dr6;
 | 
						return dr6;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
 | 
					static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	struct kvm_run *kvm_run = vcpu->run;
 | 
						struct kvm_run *kvm_run = vcpu->run;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
 | 
				
			||||||
	 * rflags is the old, "raw" value of the flags.  The new value has
 | 
							kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
 | 
				
			||||||
	 * not been saved yet.
 | 
							kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
 | 
				
			||||||
	 *
 | 
							kvm_run->debug.arch.exception = DB_VECTOR;
 | 
				
			||||||
	 * This is correct even for TF set by the guest, because "the
 | 
							kvm_run->exit_reason = KVM_EXIT_DEBUG;
 | 
				
			||||||
	 * processor will not generate this exception after the instruction
 | 
							*r = EMULATE_USER_EXIT;
 | 
				
			||||||
	 * that sets the TF flag".
 | 
						} else {
 | 
				
			||||||
	 */
 | 
							/*
 | 
				
			||||||
	if (unlikely(rflags & X86_EFLAGS_TF)) {
 | 
							 * "Certain debug exceptions may clear bit 0-3.  The
 | 
				
			||||||
		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
 | 
							 * remaining contents of the DR6 register are never
 | 
				
			||||||
			kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
 | 
							 * cleared by the processor".
 | 
				
			||||||
						  DR6_RTM;
 | 
							 */
 | 
				
			||||||
			kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
 | 
							vcpu->arch.dr6 &= ~15;
 | 
				
			||||||
			kvm_run->debug.arch.exception = DB_VECTOR;
 | 
							vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
 | 
				
			||||||
			kvm_run->exit_reason = KVM_EXIT_DEBUG;
 | 
							kvm_queue_exception(vcpu, DB_VECTOR);
 | 
				
			||||||
			*r = EMULATE_USER_EXIT;
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			/*
 | 
					 | 
				
			||||||
			 * "Certain debug exceptions may clear bit 0-3.  The
 | 
					 | 
				
			||||||
			 * remaining contents of the DR6 register are never
 | 
					 | 
				
			||||||
			 * cleared by the processor".
 | 
					 | 
				
			||||||
			 */
 | 
					 | 
				
			||||||
			vcpu->arch.dr6 &= ~15;
 | 
					 | 
				
			||||||
			vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
 | 
					 | 
				
			||||||
			kvm_queue_exception(vcpu, DB_VECTOR);
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
 | 
				
			||||||
	int r = EMULATE_DONE;
 | 
						int r = EMULATE_DONE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	kvm_x86_ops->skip_emulated_instruction(vcpu);
 | 
						kvm_x86_ops->skip_emulated_instruction(vcpu);
 | 
				
			||||||
	kvm_vcpu_check_singlestep(vcpu, rflags, &r);
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * rflags is the old, "raw" value of the flags.  The new value has
 | 
				
			||||||
 | 
						 * not been saved yet.
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * This is correct even for TF set by the guest, because "the
 | 
				
			||||||
 | 
						 * processor will not generate this exception after the instruction
 | 
				
			||||||
 | 
						 * that sets the TF flag".
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (unlikely(rflags & X86_EFLAGS_TF))
 | 
				
			||||||
 | 
							kvm_vcpu_do_singlestep(vcpu, &r);
 | 
				
			||||||
	return r == EMULATE_DONE;
 | 
						return r == EMULATE_DONE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
 | 
					EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
 | 
				
			||||||
| 
						 | 
					@ -5726,8 +5727,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 | 
				
			||||||
		toggle_interruptibility(vcpu, ctxt->interruptibility);
 | 
							toggle_interruptibility(vcpu, ctxt->interruptibility);
 | 
				
			||||||
		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 | 
							vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 | 
				
			||||||
		kvm_rip_write(vcpu, ctxt->eip);
 | 
							kvm_rip_write(vcpu, ctxt->eip);
 | 
				
			||||||
		if (r == EMULATE_DONE)
 | 
							if (r == EMULATE_DONE &&
 | 
				
			||||||
			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
 | 
							    (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
 | 
				
			||||||
 | 
								kvm_vcpu_do_singlestep(vcpu, &r);
 | 
				
			||||||
		if (!ctxt->have_exception ||
 | 
							if (!ctxt->have_exception ||
 | 
				
			||||||
		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
 | 
							    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
 | 
				
			||||||
			__kvm_set_rflags(vcpu, ctxt->eflags);
 | 
								__kvm_set_rflags(vcpu, ctxt->eflags);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue