From: dv Date: Mon, 17 May 2021 23:36:40 +0000 (+0000) Subject: vmm(4): fix race condition related to incorrect physical cpu tracking X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=b931547982ad0bc035ad5580896a428a8fb01525;p=openbsd vmm(4): fix race condition related to incorrect physical cpu tracking The race condition results in vmread errors when disabling interrupt window exiting. The vmd(8) guest gets an EINVAL response to it's VMM_IOC_RUN ioctl and aborts, sending the guest to an abrupt end. Similarly to the recent SVM commit, this changes the vcpu run loop logic to check for resuming on a different cpu. If so, the VMCS is loaded onto the new cpu. Instead of using just a "resume" flag, the real reason (other than cpu switch) that would require reloading the VMCS is vmm may have cleared the VMCS before yielding to the scheduler. The "resume" flag is still used in vmx_enter_guest to toggle between vmlaunch/vmresume calls, but is no longer the arbiter of if vmm reloads the VMCS or not. A more subtle race condition still exists related to clearing the VMCS on the previous cpu, but that's for a future commit. OK mlarkin@ --- diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c index 7ebc0ec77ee..7e3ebf8a55f 100644 --- a/sys/arch/amd64/amd64/vmm.c +++ b/sys/arch/amd64/amd64/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.282 2021/05/12 04:00:46 mlarkin Exp $ */ +/* $OpenBSD: vmm.c,v 1.283 2021/05/17 23:36:40 dv Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -4487,9 +4487,9 @@ vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode) int vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) { - int ret = 0, resume, locked, exitinfo; + int ret = 0, resume = 0, cleared = 1, locked, exitinfo; struct region_descriptor gdt; - struct cpu_info *ci; + struct cpu_info *ci = NULL; uint64_t exit_reason, cr3, vmcs_ptr, insn_error; struct schedstate_percpu *spc; struct vmx_invvpid_descriptor vid; @@ -4498,7 +4498,6 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) u_long s; struct region_descriptor gdtr, idtr; - resume = 0; irq = vrp->vrp_irq; /* @@ -4576,12 +4575,13 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) while (ret == 0) { vmm_update_pvclock(vcpu); - if (!resume) { + if (cleared || ci != curcpu()) { /* - * We are launching for the first time, or we are - * resuming from a different pcpu, so we need to - * reset certain pcpu-specific values. + * We are launching for the first time, we yielded the + * pcpu, or we are resuming from a different pcpu, so we + * need to reset certain pcpu-specific values. */ + resume = cleared = 0; ci = curcpu(); setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1); @@ -4605,7 +4605,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) /* Host TR base */ if (vmwrite(VMCS_HOST_IA32_TR_BASE, - (uint64_t)curcpu()->ci_tss)) { + (uint64_t)ci->ci_tss)) { ret = EINVAL; break; } @@ -4723,7 +4723,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) KERNEL_UNLOCK(); ret = vmx_enter_guest(&vcpu->vc_control_pa, &vcpu->vc_gueststate, resume, - curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr); + ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr); bare_lgdt(&gdtr); lidt(&idtr); @@ -4849,11 +4849,11 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) /* Check if we should yield - don't hog the cpu */ spc = &ci->ci_schedstate; if (spc->spc_schedflags & SPCF_SHOULDYIELD) { - resume = 0; if (vmclear(&vcpu->vc_control_pa)) { ret = EINVAL; break; } + cleared = 1; yield(); } } else if (ret == VMX_FAIL_LAUNCH_INVALID_VMCS) {