From b931547982ad0bc035ad5580896a428a8fb01525 Mon Sep 17 00:00:00 2001
From: dv <dv@openbsd.org>
Date: Mon, 17 May 2021 23:36:40 +0000
Subject: [PATCH] vmm(4): fix race condition related to incorrect physical cpu
 tracking

The race condition results in vmread errors when disabling interrupt
window exiting. The vmd(8) guest gets an EINVAL response to it's
VMM_IOC_RUN ioctl and aborts, sending the guest to an abrupt end.

Similarly to the recent SVM commit, this changes the vcpu run loop
logic to check for resuming on a different cpu. If so, the VMCS is
loaded onto the new cpu.

Instead of using just a "resume" flag, the real reason (other than cpu
switch) that would require reloading the VMCS is vmm may have cleared
the VMCS before yielding to the scheduler. The "resume" flag is still
used in vmx_enter_guest to toggle between vmlaunch/vmresume calls, but
is no longer the arbiter of if vmm reloads the VMCS or not.

A more subtle race condition still exists related to clearing the VMCS
on the previous cpu, but that's for a future commit.

OK mlarkin@
---
 sys/arch/amd64/amd64/vmm.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c
index 7ebc0ec77ee..7e3ebf8a55f 100644
--- a/sys/arch/amd64/amd64/vmm.c
+++ b/sys/arch/amd64/amd64/vmm.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vmm.c,v 1.282 2021/05/12 04:00:46 mlarkin Exp $	*/
+/*	$OpenBSD: vmm.c,v 1.283 2021/05/17 23:36:40 dv Exp $	*/
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
  *
@@ -4487,9 +4487,9 @@ vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode)
 int
 vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 {
-	int ret = 0, resume, locked, exitinfo;
+	int ret = 0, resume = 0, cleared = 1, locked, exitinfo;
 	struct region_descriptor gdt;
-	struct cpu_info *ci;
+	struct cpu_info *ci = NULL;
 	uint64_t exit_reason, cr3, vmcs_ptr, insn_error;
 	struct schedstate_percpu *spc;
 	struct vmx_invvpid_descriptor vid;
@@ -4498,7 +4498,6 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 	u_long s;
 	struct region_descriptor gdtr, idtr;
 
-	resume = 0;
 	irq = vrp->vrp_irq;
 
 	/*
@@ -4576,12 +4575,13 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 
 	while (ret == 0) {
 		vmm_update_pvclock(vcpu);
-		if (!resume) {
+		if (cleared || ci != curcpu()) {
 			/*
-			 * We are launching for the first time, or we are
-			 * resuming from a different pcpu, so we need to
-			 * reset certain pcpu-specific values.
+			 * We are launching for the first time, we yielded the
+			 * pcpu, or we are resuming from a different pcpu, so we
+			 * need to reset certain pcpu-specific values.
 			 */
+			resume = cleared = 0;
 			ci = curcpu();
 			setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
 
@@ -4605,7 +4605,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 
 			/* Host TR base */
 			if (vmwrite(VMCS_HOST_IA32_TR_BASE,
-			    (uint64_t)curcpu()->ci_tss)) {
+			    (uint64_t)ci->ci_tss)) {
 				ret = EINVAL;
 				break;
 			}
@@ -4723,7 +4723,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 		KERNEL_UNLOCK();
 		ret = vmx_enter_guest(&vcpu->vc_control_pa,
 		    &vcpu->vc_gueststate, resume,
-		    curcpu()->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);
+		    ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);
 
 		bare_lgdt(&gdtr);
 		lidt(&idtr);
@@ -4849,11 +4849,11 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 			/* Check if we should yield - don't hog the cpu */
 			spc = &ci->ci_schedstate;
 			if (spc->spc_schedflags & SPCF_SHOULDYIELD) {
-				resume = 0;
 				if (vmclear(&vcpu->vc_control_pa)) {
 					ret = EINVAL;
 					break;
 				}
+				cleared = 1;
 				yield();
 			}
 		} else if (ret == VMX_FAIL_LAUNCH_INVALID_VMCS) {
-- 
2.20.1