Initial support for mmio assist for vmm(4)
authordv <dv@openbsd.org>
Tue, 30 Aug 2022 17:09:21 +0000 (17:09 +0000)
committerdv <dv@openbsd.org>
Tue, 30 Aug 2022 17:09:21 +0000 (17:09 +0000)
Provide the basic information required for a userland assist in
emulating instructions touching mmio regions, sending as much
information as is provided by the host hardware.

No decode or assist provided at the moment by vmd(8).

ok mlarkin@

sys/arch/amd64/amd64/identcpu.c
sys/arch/amd64/amd64/vmm.c
sys/arch/amd64/include/cpu.h
sys/arch/amd64/include/specialreg.h
sys/arch/amd64/include/vmmvar.h
usr.sbin/vmd/vm.c

index e1748de..7cb1d81 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: identcpu.c,v 1.126 2022/08/07 23:56:06 guenther Exp $ */
+/*     $OpenBSD: identcpu.c,v 1.127 2022/08/30 17:09:21 dv Exp $       */
 /*     $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $        */
 
 /*
@@ -335,7 +335,7 @@ cpu_hz_update_sensor(void *args)
 
        if (mdelta > 0) {
                val = (adelta * 1000000) / mdelta * tsc_frequency;
-               val = ((val + FREQ_50MHZ / 2) / FREQ_50MHZ) * FREQ_50MHZ; 
+               val = ((val + FREQ_50MHZ / 2) / FREQ_50MHZ) * FREQ_50MHZ;
                ci->ci_hz_sensor.value = val;
        }
 
@@ -1055,6 +1055,9 @@ cpu_check_vmm_cap(struct cpu_info *ci)
 
                if (edx & AMD_SVM_VMCB_CLEAN_CAP)
                        ci->ci_vmm_cap.vcc_svm.svm_vmcb_clean = 1;
+
+               if (edx & AMD_SVM_DECODE_ASSIST_CAP)
+                       ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1;
        }
 
        /*
index 354cfac..9ee961c 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmm.c,v 1.319 2022/08/07 23:56:06 guenther Exp $      */
+/*     $OpenBSD: vmm.c,v 1.320 2022/08/30 17:09:21 dv Exp $    */
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
  *
@@ -4891,11 +4891,20 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
                                vcpu->vc_gueststate.vg_rax =
                                    vcpu->vc_exit.vei.vei_data;
                        break;
+               case VMX_EXIT_EPT_VIOLATION:
+                       ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0,
+                           &vcpu->vc_exit.vrs);
+                       if (ret) {
+                               printf("%s: vm %d vcpu %d failed to update "
+                                   "registers\n", __func__,
+                                   vcpu->vc_parent->vm_id, vcpu->vc_id);
+                               return (EINVAL);
+                       }
+                       break;
                case VM_EXIT_NONE:
                case VMX_EXIT_HLT:
                case VMX_EXIT_INT_WINDOW:
                case VMX_EXIT_EXTINT:
-               case VMX_EXIT_EPT_VIOLATION:
                case VMX_EXIT_CPUID:
                case VMX_EXIT_XSETBV:
                        break;
@@ -4927,6 +4936,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
                        break;
 #endif /* VMM_DEBUG */
                }
+               memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
        }
 
        setregion(&gdt, ci->ci_gdt, GDT_SIZE - 1);
@@ -5658,7 +5668,7 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
 
        if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) {
                DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa);
-               return (VMM_MEM_TYPE_REGULAR);
+               return (VMM_MEM_TYPE_MMIO);
        }
 
        /* XXX Use binary search? */
@@ -5782,18 +5792,31 @@ int
 svm_handle_np_fault(struct vcpu *vcpu)
 {
        uint64_t gpa;
-       int gpa_memtype, ret;
+       int gpa_memtype, ret = 0;
        struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+       struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
+       struct cpu_info *ci = curcpu();
 
-       ret = 0;
+       memset(vee, 0, sizeof(*vee));
 
        gpa = vmcb->v_exitinfo2;
 
        gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
        switch (gpa_memtype) {
        case VMM_MEM_TYPE_REGULAR:
+               vee->vee_fault_type = VEE_FAULT_HANDLED;
                ret = svm_fault_page(vcpu, gpa);
                break;
+       case VMM_MEM_TYPE_MMIO:
+               vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
+               if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) {
+                       vee->vee_insn_len = vmcb->v_n_bytes_fetched;
+                       memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes,
+                           sizeof(vee->vee_insn_bytes));
+                       vee->vee_insn_info |= VEE_BYTES_VALID;
+               }
+               ret = EAGAIN;
+               break;
        default:
                printf("unknown memory type %d for GPA 0x%llx\n",
                    gpa_memtype, gpa);
@@ -5862,10 +5885,12 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
 int
 vmx_handle_np_fault(struct vcpu *vcpu)
 {
-       uint64_t gpa;
-       int gpa_memtype, ret;
+       uint64_t insn_len = 0, gpa;
+       int gpa_memtype, ret = 0;
+       struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
+
+       memset(vee, 0, sizeof(*vee));
 
-       ret = 0;
        if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS, &gpa)) {
                printf("%s: cannot extract faulting pa\n", __func__);
                return (EINVAL);
@@ -5874,8 +5899,22 @@ vmx_handle_np_fault(struct vcpu *vcpu)
        gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
        switch (gpa_memtype) {
        case VMM_MEM_TYPE_REGULAR:
+               vee->vee_fault_type = VEE_FAULT_HANDLED;
                ret = vmx_fault_page(vcpu, gpa);
                break;
+       case VMM_MEM_TYPE_MMIO:
+               vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
+               if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_len) ||
+                   insn_len == 0 || insn_len > 15) {
+                       printf("%s: failed to extract instruction length\n",
+                           __func__);
+                       ret = EINVAL;
+               } else {
+                       vee->vee_insn_len = (uint32_t)insn_len;
+                       vee->vee_insn_info |= VEE_LEN_VALID;
+                       ret = EAGAIN;
+               }
+               break;
        default:
                printf("unknown memory type %d for GPA 0x%llx\n",
                    gpa_memtype, gpa);
@@ -7321,7 +7360,19 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
                                    vcpu->vc_exit.vei.vei_data;
                                vmcb->v_rax = vcpu->vc_gueststate.vg_rax;
                        }
+                       break;
+               case SVM_VMEXIT_NPF:
+                       ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS,
+                           &vcpu->vc_exit.vrs);
+                       if (ret) {
+                               printf("%s: vm %d vcpu %d failed to update "
+                                   "registers\n", __func__,
+                                   vcpu->vc_parent->vm_id, vcpu->vc_id);
+                               return (EINVAL);
+                       }
+                       break;
                }
+               memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit));
        }
 
        while (ret == 0) {
index a4af201..2689505 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: cpu.h,v 1.149 2022/08/25 17:25:25 cheloha Exp $       */
+/*     $OpenBSD: cpu.h,v 1.150 2022/08/30 17:09:21 dv Exp $    */
 /*     $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $     */
 
 /*-
@@ -82,6 +82,7 @@ struct svm {
        uint32_t        svm_max_asid;
        uint8_t         svm_flush_by_asid;
        uint8_t         svm_vmcb_clean;
+       uint8_t         svm_decode_assist;
 };
 
 union vmm_cpu_cap {
index 8d3f829..e7bc30f 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: specialreg.h,v 1.93 2022/08/20 19:26:00 daniel Exp $  */
+/*     $OpenBSD: specialreg.h,v 1.94 2022/08/30 17:09:21 dv Exp $      */
 /*     $NetBSD: specialreg.h,v 1.1 2003/04/26 18:39:48 fvdl Exp $      */
 /*     $NetBSD: x86/specialreg.h,v 1.2 2003/04/25 21:54:30 fvdl Exp $  */
 
 #define AMD_SVM_NESTED_PAGING_CAP      (1 << 0)
 #define AMD_SVM_VMCB_CLEAN_CAP         (1 << 5)
 #define AMD_SVM_FLUSH_BY_ASID_CAP      (1 << 6)
+#define AMD_SVM_DECODE_ASSIST_CAP      (1 << 7)
 #define AMD_SVMDIS                     0x10
 
 #define SVM_TLB_CONTROL_FLUSH_NONE     0
index 6fbb5e1..a7f69eb 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmmvar.h,v 1.79 2022/06/30 13:17:58 dv Exp $  */
+/*     $OpenBSD: vmmvar.h,v 1.80 2022/08/30 17:09:21 dv Exp $  */
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
  *
@@ -324,7 +324,9 @@ enum {
 };
 
 enum {
+       VEE_FAULT_INVALID = 0,
        VEE_FAULT_HANDLED,
+       VEE_FAULT_MMIO_ASSIST,
        VEE_FAULT_PROTECT,
 };
 
@@ -361,7 +363,12 @@ struct vm_exit_inout {
  *  vm_exit_eptviolation       : describes an EPT VIOLATION exit
  */
 struct vm_exit_eptviolation {
-       uint8_t         vee_fault_type;
+       uint8_t         vee_fault_type;         /* type of vm exit */
+       uint8_t         vee_insn_info;          /* bitfield */
+#define VEE_LEN_VALID          0x1             /* vee_insn_len is valid */
+#define VEE_BYTES_VALID                0x2             /* vee_insn_bytes is valid */
+       uint8_t         vee_insn_len;           /* [VMX] instruction length */
+       uint8_t         vee_insn_bytes[15];     /* [SVM] bytes at {R,E,}IP */
 };
 
 /*
@@ -709,6 +716,7 @@ enum {
 
 enum {
        VMM_MEM_TYPE_REGULAR,
+       VMM_MEM_TYPE_MMIO,
        VMM_MEM_TYPE_UNKNOWN
 };
 
index eac6616..209814e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vm.c,v 1.71 2022/06/29 17:39:54 dv Exp $      */
+/*     $OpenBSD: vm.c,v 1.72 2022/08/30 17:09:21 dv Exp $      */
 
 /*
  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -1652,26 +1652,36 @@ vcpu_exit_inout(struct vm_run_params *vrp)
  *
  * Return values:
  *  0: no action required
- *  EAGAIN: a protection fault occured, kill the vm.
+ *  EFAULT: a protection fault occured, kill the vm.
  */
 int
 vcpu_exit_eptviolation(struct vm_run_params *vrp)
 {
+       int ret = 0;
+       uint8_t fault_type;
        struct vm_exit *ve = vrp->vrp_exit;
 
-       /*
-        * vmd may be exiting to vmd to handle a pending interrupt
-        * but last exit type may have been VMX_EXIT_EPT_VIOLATION,
-        * check the fault_type to ensure we really are processing
-        * a VMX_EXIT_EPT_VIOLATION.
-        */
-       if (ve->vee.vee_fault_type == VEE_FAULT_PROTECT) {
-               log_debug("%s: EPT Violation: rip=0x%llx",
-                   __progname, vrp->vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP]);
-               return (EAGAIN);
+       fault_type = ve->vee.vee_fault_type;
+       switch (fault_type) {
+       case VEE_FAULT_HANDLED:
+               log_debug("%s: fault already handled", __func__);
+               break;
+       case VEE_FAULT_MMIO_ASSIST:
+               log_warnx("%s: mmio assist required: rip=0x%llx", __progname,
+                   ve->vrs.vrs_gprs[VCPU_REGS_RIP]);
+               ret = EFAULT;
+               break;
+       case VEE_FAULT_PROTECT:
+               log_debug("%s: EPT Violation: rip=0x%llx", __progname,
+                   ve->vrs.vrs_gprs[VCPU_REGS_RIP]);
+               ret = EFAULT;
+               break;
+       default:
+               fatalx("%s: invalid fault_type %d", __progname, fault_type);
+               /* UNREACHED */
        }
 
-       return (0);
+       return (ret);
 }
 
 /*
@@ -1704,7 +1714,6 @@ vcpu_exit(struct vm_run_params *vrp)
        case VMX_EXIT_CPUID:
        case VMX_EXIT_EXTINT:
        case SVM_VMEXIT_INTR:
-       case SVM_VMEXIT_NPF:
        case SVM_VMEXIT_MSR:
        case SVM_VMEXIT_CPUID:
                /*
@@ -1715,11 +1724,11 @@ vcpu_exit(struct vm_run_params *vrp)
                 * in more vmd log spam).
                 */
                break;
+       case SVM_VMEXIT_NPF:
        case VMX_EXIT_EPT_VIOLATION:
                ret = vcpu_exit_eptviolation(vrp);
                if (ret)
                        return (ret);
-
                break;
        case VMX_EXIT_IO:
        case SVM_VMEXIT_IOIO: