From 2ef7560e3a04bfd37fcc08ecd39f003a03ebd93e Mon Sep 17 00:00:00 2001 From: dv Date: Wed, 9 Nov 2022 17:53:12 +0000 Subject: [PATCH] vmm(4): treat vcpu lists as immutable, reducing complexity. Since vmm doesn't support hot-plug vcpus we can reduce complexity by treating the vcpu list per vm as immutable after creation. As a consequence, we can use the vm reference count to protect the lifetime of the vcpus, removing the need for reference counting individual vcpu objects. With an immutable list, we no longer need a rwlock protecting it either. Original diff from dlg@ that I reworked and tested. ok dlg@, mlarkin@ --- sys/arch/amd64/amd64/vmm.c | 80 ++++++++------------------------- sys/arch/amd64/include/vmmvar.h | 3 +- 2 files changed, 19 insertions(+), 64 deletions(-) diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c index 7ba57a463cb..3f7e0ce405a 100644 --- a/sys/arch/amd64/amd64/vmm.c +++ b/sys/arch/amd64/amd64/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.331 2022/11/09 10:19:20 dlg Exp $ */ +/* $OpenBSD: vmm.c,v 1.332 2022/11/09 17:53:12 dv Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -504,9 +504,6 @@ vmm_quiesce_vmx(void) /* Iterate over each vm... */ SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) { - if ((err = rw_enter(&vm->vm_vcpu_lock, RW_READ | RW_NOSLEEP))) - break; - /* Iterate over each vcpu... */ SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { err = rw_enter(&vcpu->vc_lock, RW_WRITE | RW_NOSLEEP); @@ -549,7 +546,6 @@ vmm_quiesce_vmx(void) DPRINTF("%s: cleared vcpu %d for vm %d\n", __func__, vcpu->vc_id, vm->vm_id); } - rw_exit_read(&vm->vm_vcpu_lock); if (err) break; } @@ -783,18 +779,14 @@ vm_find_vcpu(struct vm *vm, uint32_t id) struct vcpu *vcpu; if (vm == NULL) - return NULL; + return (NULL); - rw_enter_read(&vm->vm_vcpu_lock); SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - refcnt_take(&vcpu->vc_refcnt); if (vcpu->vc_id == id) - break; - refcnt_rele_wake(&vcpu->vc_refcnt); + return (vcpu); } - rw_exit_read(&vm->vm_vcpu_lock); - return vcpu; + return (NULL); } @@ -852,8 +844,6 @@ vm_resetcpu(struct vm_resetcpu_params *vrp) } rw_exit_write(&vcpu->vc_lock); out: - if (vcpu != NULL) - refcnt_rele_wake(&vcpu->vc_refcnt); refcnt_rele_wake(&vm->vm_refcnt); return (ret); @@ -903,7 +893,6 @@ vm_intr_pending(struct vm_intr_params *vip) x86_send_ipi(ci, X86_IPI_NOP); #endif - refcnt_rele_wake(&vcpu->vc_refcnt); out: refcnt_rele_wake(&vm->vm_refcnt); return (ret); @@ -957,7 +946,6 @@ vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) { vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa); } } - refcnt_rele_wake(&vcpu->vc_refcnt); out: refcnt_rele_wake(&vm->vm_refcnt); return (ret); @@ -1018,7 +1006,6 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir) ret = EINVAL; } rw_exit_write(&vcpu->vc_lock); - refcnt_rele_wake(&vcpu->vc_refcnt); out: refcnt_rele_wake(&vm->vm_refcnt); return (ret); @@ -1184,10 +1171,8 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep) } else ret = EINVAL; out: - if (vcpu != NULL) { + if (vcpu != NULL) rw_exit_write(&vcpu->vc_lock); - refcnt_rele_wake(&vcpu->vc_refcnt); - } out_nolock: refcnt_rele_wake(&vm->vm_refcnt); return (ret); @@ -1355,7 +1340,6 @@ vm_find(uint32_t id, struct vm **res) rw_enter_read(&vmm_softc->vm_lock); SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link) { - refcnt_take(&vm->vm_refcnt); if (vm->vm_id == id) { /* * In the pledged VM process, only allow to find @@ -1368,12 +1352,12 @@ vm_find(uint32_t id, struct vm **res) (vm->vm_creator_pid != p->p_p->ps_pid)) ret = EPERM; else { + refcnt_take(&vm->vm_refcnt); *res = vm; ret = 0; } break; } - refcnt_rele_wake(&vm->vm_refcnt); } rw_exit_read(&vmm_softc->vm_lock); @@ -1762,10 +1746,6 @@ vm_create(struct vm_create_params *vcp, struct proc *p) /* Instantiate and configure the new vm. */ vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO); - refcnt_init(&vm->vm_refcnt); /* Do not release yet. */ - - SLIST_INIT(&vm->vm_vcpu_list); - rw_init(&vm->vm_vcpu_lock, "vcpu_list"); vm->vm_creator_pid = p->p_p->ps_pid; vm->vm_nmemranges = vcp->vcp_nmemranges; @@ -1783,9 +1763,9 @@ vm_create(struct vm_create_params *vcp, struct proc *p) vm->vm_vcpu_ct = 0; /* Initialize each VCPU defined in 'vcp' */ + SLIST_INIT(&vm->vm_vcpu_list); for (i = 0; i < vcp->vcp_ncpus; i++) { vcpu = pool_get(&vcpu_pool, PR_WAITOK | PR_ZERO); - refcnt_init(&vcpu->vc_refcnt); vcpu->vc_parent = vm; if ((ret = vcpu_init(vcpu)) != 0) { @@ -1818,13 +1798,12 @@ vm_create(struct vm_create_params *vcp, struct proc *p) vm->vm_id = vmm_softc->vm_idx; vcp->vcp_id = vm->vm_id; - /* Update list counts. */ + /* Publish the vm into the list and update counts. */ + refcnt_init(&vm->vm_refcnt); + SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link); vmm_softc->vm_ct++; vmm_softc->vcpu_ct += vm->vm_vcpu_ct; - /* Publish vm into list, inheriting the reference. */ - SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link); - rw_exit_write(&vmm_softc->vm_lock); return (0); @@ -4066,19 +4045,14 @@ vm_teardown(struct vm **target) KERNEL_ASSERT_UNLOCKED(); - refcnt_finalize(&vm->vm_refcnt, "vmteardown"); - /* Free VCPUs */ - rw_enter_write(&vm->vm_vcpu_lock); SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp) { - refcnt_finalize(&vcpu->vc_refcnt, "vcputeardown"); SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link); vcpu_deinit(vcpu); pool_put(&vcpu_pool, vcpu); nvcpu++; } - rw_exit_write(&vm->vm_vcpu_lock); vm_impl_deinit(vm); @@ -4403,19 +4377,15 @@ vm_get_info(struct vm_info_params *vip) out[i].vir_creator_pid = vm->vm_creator_pid; strlcpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN); - rw_enter_read(&vm->vm_vcpu_lock); for (j = 0; j < vm->vm_vcpu_ct; j++) { out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN; SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - refcnt_take(&vcpu->vc_refcnt); if (vcpu->vc_id == j) out[i].vir_vcpu_state[j] = vcpu->vc_state; - refcnt_rele_wake(&vcpu->vc_refcnt); } } - rw_exit_read(&vm->vm_vcpu_lock); refcnt_rele_wake(&vm->vm_refcnt); i++; @@ -4449,8 +4419,6 @@ int vm_terminate(struct vm_terminate_params *vtp) { struct vm *vm; - struct vcpu *vcpu; - u_int old, next; int error, nvcpu, vm_id; /* @@ -4460,28 +4428,18 @@ vm_terminate(struct vm_terminate_params *vtp) if (error) return (error); - /* Stop all vcpu's for the vm. */ - rw_enter_read(&vm->vm_vcpu_lock); - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) { - refcnt_take(&vcpu->vc_refcnt); - do { - old = vcpu->vc_state; - if (old == VCPU_STATE_RUNNING) - next = VCPU_STATE_REQTERM; - else if (old == VCPU_STATE_STOPPED) - next = VCPU_STATE_TERMINATED; - else /* must be REQTERM or TERMINATED */ - break; - } while (old != atomic_cas_uint(&vcpu->vc_state, old, next)); - refcnt_rele_wake(&vcpu->vc_refcnt); - } - rw_exit_read(&vm->vm_vcpu_lock); - /* Pop the vm out of the global vm list. */ rw_enter_write(&vmm_softc->vm_lock); SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link); rw_exit_write(&vmm_softc->vm_lock); - refcnt_rele_wake(&vm->vm_refcnt); /* Drop list reference. */ + + /* Drop the vm_list's reference to the vm. */ + if (refcnt_rele(&vm->vm_refcnt)) + panic("%s: vm %d(%p) vm_list refcnt drop was the last", + __func__, vm->vm_id, vm); + + /* Wait for our reference (taken from vm_find) is the last active. */ + refcnt_finalize(&vm->vm_refcnt, __func__); vm_id = vm->vm_id; nvcpu = vm->vm_vcpu_ct; @@ -4594,8 +4552,6 @@ vm_run(struct vm_run_params *vrp) out_unlock: rw_exit_write(&vcpu->vc_lock); out: - if (vcpu != NULL) - refcnt_rele_wake(&vcpu->vc_refcnt); refcnt_rele_wake(&vm->vm_refcnt); return (ret); } diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h index cbe901580de..320cbd3db3f 100644 --- a/sys/arch/amd64/include/vmmvar.h +++ b/sys/arch/amd64/include/vmmvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmmvar.h,v 1.82 2022/11/08 19:18:47 dlg Exp $ */ +/* $OpenBSD: vmmvar.h,v 1.83 2022/11/09 17:53:12 dv Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -932,7 +932,6 @@ struct vcpu { uint8_t vc_virt_mode; /* [I] */ struct rwlock vc_lock; - struct refcnt vc_refcnt; /* [a] */ struct cpu_info *vc_curcpu; /* [a] */ struct cpu_info *vc_last_pcpu; /* [v] */ -- 2.20.1