From 46cd802afa2fbee4457700bcc55d42ee69edcd3d Mon Sep 17 00:00:00 2001 From: dv Date: Sun, 14 Jul 2024 07:57:42 +0000 Subject: [PATCH] vmm(4)/vmx: update host cr3, invept on cpu migration. Since vmm handles nested page faults in the vcpu run loop, trying to avoid trips back to userland, it's possible for the thread to move host cpus. vmm(4) already updates some local cpu state when this happens, but also needs to update the host cr3 in the vmcs to allow vmx to restore the proper cr3 value on the next vm exit. Additionally, we should be flushing the ept cache on the new cpu. If the single context flush is available, use that instead of the global flush. ok mlarkin@ --- sys/arch/amd64/amd64/vmm_machdep.c | 41 ++++++++++++++++++++--------- sys/arch/amd64/include/specialreg.h | 4 ++- sys/arch/amd64/include/vmmvar.h | 3 ++- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c index f150569d9ea..18ef8b75754 100644 --- a/sys/arch/amd64/amd64/vmm_machdep.c +++ b/sys/arch/amd64/amd64/vmm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm_machdep.c,v 1.28 2024/06/26 01:40:49 jsg Exp $ */ +/* $OpenBSD: vmm_machdep.c,v 1.29 2024/07/14 07:57:42 dv Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -126,7 +126,7 @@ int svm_fault_page(struct vcpu *, paddr_t); int vmx_fault_page(struct vcpu *, paddr_t); int vmx_handle_np_fault(struct vcpu *); int svm_handle_np_fault(struct vcpu *); -int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int); +int vmx_mprotect_ept(struct vcpu *, vm_map_t, paddr_t, paddr_t, int); pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t); int vmm_alloc_vpid(uint16_t *); void vmm_free_vpid(uint16_t); @@ -777,7 +777,8 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep) } if (vmm_softc->mode == VMM_MODE_EPT) - ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot); + ret = vmx_mprotect_ept(vcpu, vm->vm_map, sgpa, sgpa + size, + prot); else if (vmm_softc->mode == VMM_MODE_RVI) { pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot); /* XXX requires a invlpga */ @@ -799,7 +800,8 @@ out_nolock: * required. */ int -vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot) +vmx_mprotect_ept(struct vcpu *vcpu, vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, + int prot) { struct vmx_invept_descriptor vid; pmap_t pmap; @@ -859,7 +861,7 @@ vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot) vid.vid_eptp = pmap->eptp; DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__, vid.vid_eptp); - invept(IA32_VMX_INVEPT_SINGLE_CTX, &vid); + invept(vcpu->vc_vmx_invept_op, &vid); } KERNEL_UNLOCK(); @@ -2948,6 +2950,10 @@ vcpu_init_vmx(struct vcpu *vcpu) ret = EINVAL; goto exit; } + if (msr & IA32_EPT_VPID_CAP_INVEPT_CONTEXT) + vcpu->vc_vmx_invept_op = IA32_VMX_INVEPT_SINGLE_CTX; + else + vcpu->vc_vmx_invept_op = IA32_VMX_INVEPT_GLOBAL_CTX; if (msr & IA32_EPT_VPID_CAP_WB) { /* WB cache type supported */ @@ -3896,6 +3902,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) struct schedstate_percpu *spc; struct vmx_msr_store *msr_store; struct vmx_invvpid_descriptor vid; + struct vmx_invept_descriptor vid_ept; uint64_t cr0, eii, procbased, int_st; u_long s; @@ -3940,14 +3947,6 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) } memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit)); - /* Host CR3 */ - cr3 = rcr3(); - if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) { - printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, - VMCS_HOST_IA32_CR3, cr3); - return (EINVAL); - } - /* Handle vmd(8) injected interrupts */ /* Is there an interrupt pending injection? */ if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR) { @@ -4001,6 +4000,22 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) ci = curcpu(); vcpu->vc_last_pcpu = ci; + /* Invalidate EPT cache. */ + vid_ept.vid_reserved = 0; + vid_ept.vid_eptp = vcpu->vc_parent->vm_map->pmap->eptp; + if (invept(vcpu->vc_vmx_invept_op, &vid_ept)) { + printf("%s: invept\n", __func__); + return (EINVAL); + } + + /* Host CR3 */ + cr3 = rcr3(); + if (vmwrite(VMCS_HOST_IA32_CR3, cr3)) { + printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__, + VMCS_HOST_IA32_CR3, cr3); + return (EINVAL); + } + setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1); if (gdt.rd_base == 0) { printf("%s: setregion\n", __func__); diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h index 341b1df2309..7d1a0804a0c 100644 --- a/sys/arch/amd64/include/specialreg.h +++ b/sys/arch/amd64/include/specialreg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specialreg.h,v 1.113 2024/06/24 21:22:14 bluhm Exp $ */ +/* $OpenBSD: specialreg.h,v 1.114 2024/07/14 07:57:42 dv Exp $ */ /* $NetBSD: specialreg.h,v 1.1 2003/04/26 18:39:48 fvdl Exp $ */ /* $NetBSD: x86/specialreg.h,v 1.2 2003/04/25 21:54:30 fvdl Exp $ */ @@ -1117,6 +1117,8 @@ #define IA32_EPT_VPID_CAP_PAGE_WALK_4 (1ULL << 6) #define IA32_EPT_VPID_CAP_WB (1ULL << 14) #define IA32_EPT_VPID_CAP_AD_BITS (1ULL << 21) +#define IA32_EPT_VPID_CAP_INVEPT_CONTEXT (1ULL << 25) +#define IA32_EPT_VPID_CAP_INVEPT_ALL (1ULL << 26) #define IA32_EPT_PAGING_CACHE_TYPE_UC 0x0 #define IA32_EPT_PAGING_CACHE_TYPE_WB 0x6 diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h index b67d3f0a7ba..3f35e520926 100644 --- a/sys/arch/amd64/include/vmmvar.h +++ b/sys/arch/amd64/include/vmmvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmmvar.h,v 1.103 2024/07/10 09:27:32 dv Exp $ */ +/* $OpenBSD: vmmvar.h,v 1.104 2024/07/14 07:57:42 dv Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -886,6 +886,7 @@ struct vcpu { uint32_t vc_vmx_vmcs_state; /* [a] */ #define VMCS_CLEARED 0 #define VMCS_LAUNCHED 1 + uint64_t vc_vmx_invept_op; /* SVM only (all requiring [v]) */ vaddr_t vc_svm_hsa_va; -- 2.20.1