vmm/vmd: add exception injection and refactor inject api.
authordv <dv@openbsd.org>
Tue, 9 Apr 2024 21:55:16 +0000 (21:55 +0000)
committerdv <dv@openbsd.org>
Tue, 9 Apr 2024 21:55:16 +0000 (21:55 +0000)
In order to continue work on mmio and other instruction emulation,
vmd(8) needs the ability to inject exceptions (like page faults)
from userland.

Refactor the way events are injected from userland, cleaning up how
hardware (external) interrupts are injected in the process.

ok mlarkin@

regress/sys/arch/amd64/vmm/vcpu.c
sys/arch/amd64/amd64/vmm_machdep.c
sys/arch/amd64/include/vmmvar.h
usr.sbin/vmd/vm.c

index 84bd949..2c72871 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vcpu.c,v 1.6 2023/05/13 23:15:28 dv Exp $     */
+/*     $OpenBSD: vcpu.c,v 1.7 2024/04/09 21:55:16 dv Exp $     */
 
 /*
  * Copyright (c) 2022 Dave Voutila <dv@openbsd.org>
 #include <unistd.h>
 
 #define KIB            1024
-#define MIB            (1 << 20)
+#define MIB            (1UL << 20)
+#define GIB            (1024 * MIB)
 #define VMM_NODE       "/dev/vmm"
 
+#define LOW_MEM                0
+#define UPPER_MEM      1
+
 #define PCKBC_AUX      0x61
+#define PCJR_DISKCTRL  0xF0
 
 const char             *VM_NAME = "regress";
 
+const uint8_t PUSHW_DX[] = { 0x66, 0x52 };              // pushw %dx
+const uint8_t INS[] = { 0x6C };                                 // ins es:[di],dx
+const uint8_t IN_PCJR[] = { 0xE4, 0xF0 };               // in 0xF0
+
 /* Originally from vmd(8)'s vm.c */
 const struct vcpu_reg_state vcpu_init_flat16 = {
        .vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
        .vrs_gprs[VCPU_REGS_RIP] = 0xFFF0,
-       .vrs_gprs[VCPU_REGS_RSP] = 0x0,
+       .vrs_gprs[VCPU_REGS_RDX] = PCKBC_AUX,   /* Port used by INS */
+       .vrs_gprs[VCPU_REGS_RSP] =  0x800,      /* Set our stack in low mem. */
        .vrs_crs[VCPU_REGS_CR0] = 0x60000010,
-       .vrs_crs[VCPU_REGS_CR3] = 0,
-       .vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x809F, 0xF0000},
-       .vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-       .vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x8093, 0x0},
-       .vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-       .vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-       .vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-       .vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
-       .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
+       .vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x0093, 0xFFFF0000},
+       .vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+       .vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x0093, 0x0},
+       .vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+       .vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+       .vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+       .vrs_gdtr = { 0x0, 0xFFFF, 0x0082, 0x0},
+       .vrs_idtr = { 0x0, 0xFFFF, 0x0082, 0x0},
        .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
        .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
-       .vrs_msrs[VCPU_REGS_EFER] = 0ULL,
-       .vrs_drs[VCPU_REGS_DR0] = 0x0,
-       .vrs_drs[VCPU_REGS_DR1] = 0x0,
-       .vrs_drs[VCPU_REGS_DR2] = 0x0,
-       .vrs_drs[VCPU_REGS_DR3] = 0x0,
        .vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0,
        .vrs_drs[VCPU_REGS_DR7] = 0x400,
-       .vrs_msrs[VCPU_REGS_STAR] = 0ULL,
-       .vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
-       .vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
-       .vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
-       .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
-       .vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87
+       .vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87,
+};
+
+struct intr_handler {
+       uint16_t        offset;
+       uint16_t        segment;
+};
+
+const struct intr_handler ivt[256] = {
+       [VMM_EX_GP] = { .segment = 0x0, .offset = 0x0B5D },
 };
 
 int
@@ -87,7 +95,8 @@ main(int argc, char **argv)
 
        struct vm_mem_range             *vmr;
        int                              fd, ret = 1;
-       size_t                           i, j;
+       size_t                           i;
+       off_t                            off, reset = 0xFFFFFFF0, stack = 0x800;
        void                            *p;
 
        fd = open(VMM_NODE, O_RDWR);
@@ -95,7 +104,7 @@ main(int argc, char **argv)
                err(1, "open %s", VMM_NODE);
 
        /*
-        * 1. Create our VM with 1 vcpu and 2 MiB of memory.
+        * 1. Create our VM with 1 vcpu and 64 MiB of memory.
         */
        memset(&vcp, 0, sizeof(vcp));
        strlcpy(vcp.vcp_name, VM_NAME, sizeof(vcp.vcp_name));
@@ -103,34 +112,53 @@ main(int argc, char **argv)
 
        /* Split into two ranges, similar to how vmd(8) might do it. */
        vcp.vcp_nmemranges = 2;
-       vcp.vcp_memranges[0].vmr_gpa = 0x0;
-       vcp.vcp_memranges[0].vmr_size = 640 * KIB;
-       vcp.vcp_memranges[1].vmr_gpa = 640 * KIB;
-       vcp.vcp_memranges[1].vmr_size = (2 * MIB) - (640 * KIB);
+       vcp.vcp_memranges[LOW_MEM].vmr_gpa = 0x0;
+       vcp.vcp_memranges[LOW_MEM].vmr_size = 640 * KIB;
+       vcp.vcp_memranges[UPPER_MEM].vmr_size = (64 * MIB) - (640 * KIB);
+       vcp.vcp_memranges[UPPER_MEM].vmr_gpa = (4 * GIB)
+           - vcp.vcp_memranges[UPPER_MEM].vmr_size;
 
-       /* Allocate memory. */
+       /* Allocate and Initialize our guest memory. */
        for (i = 0; i < vcp.vcp_nmemranges; i++) {
                vmr = &vcp.vcp_memranges[i];
+               if (vmr->vmr_size % 2 != 0)
+                       errx(1, "memory ranges must be multiple of 2");
+
                p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANON, -1, 0);
                if (p == MAP_FAILED)
                        err(1, "mmap");
 
-               /*
-                * Fill with 2-byte IN instructions that read from what would
-                * be an ancient XT PC Keyboard status port. These reads will
-                * trigger vm exits.
-                */
-               if (vmr->vmr_size % 2 != 0)
-                       errx(1, "memory ranges must be multiple of 2");
-               for (j = 0; j < vmr->vmr_size; j += 2) {
-                       ((uint8_t*)p)[j + 0] = 0xE4;
-                       ((uint8_t*)p)[j + 1] = PCKBC_AUX;
-               }
                vmr->vmr_va = (vaddr_t)p;
                printf("created mapped region %zu: { gpa: 0x%08lx, size: %lu,"
                    " hva: 0x%lx }\n", i, vmr->vmr_gpa, vmr->vmr_size,
                    vmr->vmr_va);
+
+               /* Fill with int3 instructions. */
+               memset(p, 0xcc, vmr->vmr_size);
+
+               if (i == LOW_MEM) {
+                       /* Write our IVT. */
+                       memcpy(p, &ivt, sizeof(ivt));
+
+                       /*
+                        * Set up a #GP handler that does a read from a
+                        * non-existent PC Jr. Disk Controller.
+                        */
+                       p = (uint8_t*)((uint8_t*)p + 0xb5d);
+                       memcpy(p, IN_PCJR, sizeof(IN_PCJR));
+               } else {
+                       /*
+                        * Write our code to the reset vector:
+                        *   PUSHW %dx        ; inits the stack
+                        *   INS dx, es:[di]  ; read from port in dx
+                        */
+                       off = reset - vmr->vmr_gpa;
+                       p = (uint8_t*)p + off;
+                       memcpy(p, PUSHW_DX, sizeof(PUSHW_DX));
+                       p = (uint8_t*)p + sizeof(PUSHW_DX);
+                       memcpy(p, INS, sizeof(INS));
+               }
        }
 
        if (ioctl(fd, VMM_IOC_CREATE, &vcp) == -1)
@@ -172,11 +200,15 @@ main(int argc, char **argv)
                vmr = &vsp.vsp_memranges[i];
                p = (void*)vmr->vmr_va;
 
-               for (j = 0; j < vmr->vmr_size; j += 2) {
-                       if (((uint8_t*)p)[j + 0] != 0xE4)
-                               errx(1, "bad byte");
-                       if (((uint8_t*)p)[j + 1] != PCKBC_AUX)
-                               errx(1, "bad byte");
+               if (i == LOW_MEM) {
+                       /* Check if our IVT is there. */
+                       if (memcmp(&ivt, p, sizeof(ivt)) != 0) {
+                               warnx("invalid ivt");
+                               goto out;
+                       }
+               } else {
+                       /* Check our code at the reset vector. */
+
                }
                printf("checked shared region %zu: { gpa: 0x%08lx, size: %lu,"
                    " hva: 0x%lx }\n", i, vmr->vmr_gpa, vmr->vmr_size,
@@ -266,7 +298,6 @@ main(int argc, char **argv)
        vrunp.vrp_exit = exit;
        vrunp.vrp_vcpu_id = 0;          /* XXX SP */
        vrunp.vrp_vm_id = vcp.vcp_id;
-       vrunp.vrp_irq = 0x0;
        vrunp.vrp_irqready = 1;
 
        if (ioctl(fd, VMM_IOC_RUN, &vrunp) == -1) {
@@ -283,8 +314,13 @@ main(int argc, char **argv)
        switch (vrunp.vrp_exit_reason) {
        case SVM_VMEXIT_IOIO:
        case VMX_EXIT_IO:
-               printf("vcpu %d on vm %d exited for io assist\n",
-                   vrunp.vrp_vcpu_id, vrunp.vrp_vm_id);
+               printf("vcpu %d on vm %d exited for io assist @ ip = 0x%llx, "
+                   "cs.base = 0x%llx, ss.base = 0x%llx, rsp = 0x%llx\n",
+                   vrunp.vrp_vcpu_id, vrunp.vrp_vm_id,
+                   vrunp.vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP],
+                   vrunp.vrp_exit->vrs.vrs_sregs[VCPU_REGS_CS].vsi_base,
+                   vrunp.vrp_exit->vrs.vrs_sregs[VCPU_REGS_SS].vsi_base,
+                   vrunp.vrp_exit->vrs.vrs_gprs[VCPU_REGS_RSP]);
                break;
        default:
                warnx("unexpected vm exit reason: 0%04x",
@@ -298,6 +334,49 @@ main(int argc, char **argv)
                    exit->vei.vei_port);
                goto out;
        }
+       if (exit->vei.vei_string != 1) {
+               warnx("expected string instruction (INS)");
+               goto out;
+       } else
+               printf("got expected string instruction\n");
+
+       /* Advance RIP? */
+       printf("insn_len = %u\n", exit->vei.vei_insn_len);
+       exit->vrs.vrs_gprs[VCPU_REGS_RIP] += exit->vei.vei_insn_len;
+
+       /*
+        * Inject a #GP and see if we end up at our isr.
+        */
+       vrunp.vrp_inject.vie_vector = VMM_EX_GP;
+       vrunp.vrp_inject.vie_errorcode = 0x11223344;
+       vrunp.vrp_inject.vie_type = VCPU_INJECT_EX;
+       printf("injecting exception 0x%x\n", vrunp.vrp_inject.vie_vector);
+       if (ioctl(fd, VMM_IOC_RUN, &vrunp) == -1) {
+               warn("VMM_IOC_RUN 2");
+               goto out;
+       }
+
+       switch (vrunp.vrp_exit_reason) {
+       case SVM_VMEXIT_IOIO:
+       case VMX_EXIT_IO:
+               printf("vcpu %d on vm %d exited for io assist @ ip = 0x%llx, "
+                   "cs.base = 0x%llx\n", vrunp.vrp_vcpu_id, vrunp.vrp_vm_id,
+                   vrunp.vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP],
+                   vrunp.vrp_exit->vrs.vrs_sregs[VCPU_REGS_CS].vsi_base);
+               break;
+       default:
+               warnx("unexpected vm exit reason: 0%04x",
+                   vrunp.vrp_exit_reason);
+               goto out;
+       }
+
+       if (exit->vei.vei_port != PCJR_DISKCTRL) {
+               warnx("expected NMI handler to poke PCJR_DISKCTLR, got 0x%02x",
+                   exit->vei.vei_port);
+               printf("rip = 0x%llx\n", exit->vrs.vrs_gprs[VCPU_REGS_RIP]);
+               goto out;
+       }
+       printf("exception handler called\n");
 
        /*
         * If we made it here, we're close to passing. Any failures during
@@ -306,6 +385,22 @@ main(int argc, char **argv)
        ret = 0;
 
 out:
+       printf("--- RESET VECTOR @ gpa 0x%llx ---\n", reset);
+       for (i=0; i<10; i++) {
+               if (i > 0)
+                       printf(" ");
+               printf("%02x", *(uint8_t*)
+                   (vsp.vsp_memranges[UPPER_MEM].vmr_va + off + i));
+       }
+       printf("\n--- STACK @ gpa 0x%llx ---\n", stack);
+       for (i=0; i<16; i++) {
+               if (i > 0)
+                       printf(" ");
+               printf("%02x", *(uint8_t*)(vsp.vsp_memranges[LOW_MEM].vmr_va
+                       + stack - i - 1));
+       }
+       printf("\n");
+
        /*
         * 6. Terminate our VM and clean up.
         */
index 55c775c..02f2c3c 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: vmm_machdep.c,v 1.22 2024/04/01 05:11:49 guenther Exp $ */
+/* $OpenBSD: vmm_machdep.c,v 1.23 2024/04/09 21:55:16 dv Exp $ */
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
  *
@@ -3694,6 +3694,10 @@ vm_run(struct vm_run_params *vrp)
                }
        }
 
+       vcpu->vc_inject.vie_type = vrp->vrp_inject.vie_type;
+       vcpu->vc_inject.vie_vector = vrp->vrp_inject.vie_vector;
+       vcpu->vc_inject.vie_errorcode = vrp->vrp_inject.vie_errorcode;
+
        WRITE_ONCE(vcpu->vc_curcpu, curcpu());
        /* Run the VCPU specified in vrp */
        if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
@@ -3966,8 +3970,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
        struct schedstate_percpu *spc;
        struct vmx_msr_store *msr_store;
        struct vmx_invvpid_descriptor vid;
-       uint64_t eii, procbased, int_st;
-       uint16_t irq;
+       uint64_t cr0, eii, procbased, int_st;
        u_long s;
 
        rw_assert_wrlock(&vcpu->vc_lock);
@@ -3983,8 +3986,6 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
         * needs to be fixed up depends on what vmd populated in the
         * exit data structure.
         */
-       irq = vrp->vrp_irq;
-
        if (vrp->vrp_intr_pending)
                vcpu->vc_intr = 1;
        else
@@ -4062,7 +4063,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 
        /* Handle vmd(8) injected interrupts */
        /* Is there an interrupt pending injection? */
-       if (irq != 0xFFFF) {
+       if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR) {
                if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) {
                        printf("%s: can't get interruptibility state\n",
                            __func__);
@@ -4071,16 +4072,15 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
 
                /* Interruptibility state 0x3 covers NMIs and STI */
                if (!(int_st & 0x3) && vcpu->vc_irqready) {
-                       eii = (irq & 0xFF);
+                       eii = (uint64_t)vcpu->vc_inject.vie_vector;
                        eii |= (1ULL << 31);    /* Valid */
-                       eii |= (0ULL << 8);     /* Hardware Interrupt */
                        if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
                                printf("vcpu_run_vmx: can't vector "
                                    "interrupt to guest\n");
                                return (EINVAL);
                        }
 
-                       irq = 0xFFFF;
+                       vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
                }
        } else if (!vcpu->vc_intr) {
                /*
@@ -4159,38 +4159,65 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
                }
 
                /* Inject event if present */
-               if (vcpu->vc_event != 0) {
-                       eii = (vcpu->vc_event & 0xFF);
+               if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
+                       eii = (uint64_t)vcpu->vc_inject.vie_vector;
                        eii |= (1ULL << 31);    /* Valid */
 
-                       /* Set the "Send error code" flag for certain vectors */
-                       switch (vcpu->vc_event & 0xFF) {
-                               case VMM_EX_DF:
-                               case VMM_EX_TS:
-                               case VMM_EX_NP:
-                               case VMM_EX_SS:
-                               case VMM_EX_GP:
-                               case VMM_EX_PF:
-                               case VMM_EX_AC:
-                                       eii |= (1ULL << 11);
-                       }
+                       switch (vcpu->vc_inject.vie_vector) {
+                       case VMM_EX_BP:
+                       case VMM_EX_OF:
+                               /* Software Exceptions */
+                               eii |= (4ULL << 8);
+                               break;
+                       case VMM_EX_DF:
+                       case VMM_EX_TS:
+                       case VMM_EX_NP:
+                       case VMM_EX_SS:
+                       case VMM_EX_GP:
+                       case VMM_EX_PF:
+                       case VMM_EX_AC:
+                               /* Hardware Exceptions */
+                               eii |= (3ULL << 8);
+                               cr0 = 0;
+                               if (vmread(VMCS_GUEST_IA32_CR0, &cr0)) {
+                                       printf("%s: vmread(VMCS_GUEST_IA32_CR0)"
+                                           "\n", __func__);
+                                       ret = EINVAL;
+                                       break;
+                               }
 
-                       eii |= (3ULL << 8);     /* Hardware Exception */
-                       if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
-                               printf("%s: can't vector event to guest\n",
-                                   __func__);
-                               ret = EINVAL;
+                               /* Don't set error codes if in real mode. */
+                               if (ret == EINVAL || !(cr0 & CR0_PE))
+                                       break;
+                               eii |= (1ULL << 11);
+
+                               /* Enforce a 0 error code for #AC. */
+                               if (vcpu->vc_inject.vie_vector == VMM_EX_AC)
+                                       vcpu->vc_inject.vie_errorcode = 0;
+                               /*
+                                * XXX: Intel SDM says if IA32_VMX_BASIC[56] is
+                                * set, error codes can be injected for hw
+                                * exceptions with or without error code,
+                                * regardless of vector. See Vol 3D. A1. Ignore
+                                * this capability for now.
+                                */
+                               if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
+                                   vcpu->vc_inject.vie_errorcode)) {
+                                       printf("%s: can't write error code to "
+                                           "guest\n", __func__);
+                                       ret = EINVAL;
+                               }
+                       } /* switch */
+                       if (ret == EINVAL)
                                break;
-                       }
 
-                       if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE, 0)) {
-                               printf("%s: can't write error code to guest\n",
+                       if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
+                               printf("%s: can't vector event to guest\n",
                                    __func__);
                                ret = EINVAL;
                                break;
                        }
-
-                       vcpu->vc_event = 0;
+                       vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
                }
 
                if (vcpu->vc_vmx_vpid_enabled) {
@@ -4771,7 +4798,9 @@ vmm_inject_gp(struct vcpu *vcpu)
 {
        DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__,
            vcpu->vc_gueststate.vg_rip);
-       vcpu->vc_event = VMM_EX_GP;
+       vcpu->vc_inject.vie_vector = VMM_EX_GP;
+       vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
+       vcpu->vc_inject.vie_errorcode = 0;
 
        return (0);
 }
@@ -4792,7 +4821,9 @@ vmm_inject_ud(struct vcpu *vcpu)
 {
        DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__,
            vcpu->vc_gueststate.vg_rip);
-       vcpu->vc_event = VMM_EX_UD;
+       vcpu->vc_inject.vie_vector = VMM_EX_UD;
+       vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
+       vcpu->vc_inject.vie_errorcode = 0;
 
        return (0);
 }
@@ -4813,7 +4844,9 @@ vmm_inject_db(struct vcpu *vcpu)
 {
        DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__,
            vcpu->vc_gueststate.vg_rip);
-       vcpu->vc_event = VMM_EX_DB;
+       vcpu->vc_inject.vie_vector = VMM_EX_DB;
+       vcpu->vc_inject.vie_type = VCPU_INJECT_EX;
+       vcpu->vc_inject.vie_errorcode = 0;
 
        return (0);
 }
@@ -6463,11 +6496,8 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
        struct cpu_info *ci = NULL;
        uint64_t exit_reason;
        struct schedstate_percpu *spc;
-       uint16_t irq;
        struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
 
-       irq = vrp->vrp_irq;
-
        if (vrp->vrp_intr_pending)
                vcpu->vc_intr = 1;
        else
@@ -6541,30 +6571,58 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
 
                /* Handle vmd(8) injected interrupts */
                /* Is there an interrupt pending injection? */
-               if (irq != 0xFFFF && vcpu->vc_irqready) {
-                       vmcb->v_eventinj = (irq & 0xFF) | (1U << 31);
-                       irq = 0xFFFF;
+               if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR &&
+                   vcpu->vc_irqready) {
+                       vmcb->v_eventinj = vcpu->vc_inject.vie_vector |
+                           (1U << 31);
+                       vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
                }
 
                /* Inject event if present */
-               if (vcpu->vc_event != 0) {
-                       DPRINTF("%s: inject event %d\n", __func__,
-                           vcpu->vc_event);
-                       vmcb->v_eventinj = 0;
+               if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
+                       vmcb->v_eventinj = vcpu->vc_inject.vie_vector;
+
                        /* Set the "Event Valid" flag for certain vectors */
-                       switch (vcpu->vc_event & 0xFF) {
-                               case VMM_EX_DF:
-                               case VMM_EX_TS:
-                               case VMM_EX_NP:
-                               case VMM_EX_SS:
-                               case VMM_EX_GP:
-                               case VMM_EX_PF:
-                               case VMM_EX_AC:
+                       switch (vcpu->vc_inject.vie_vector) {
+                       case VMM_EX_BP:
+                       case VMM_EX_OF:
+                       case VMM_EX_DB:
+                               /*
+                                * Software exception.
+                                * XXX check nRIP support.
+                                */
+                               vmcb->v_eventinj |= (4ULL << 8);
+                               break;
+                       case VMM_EX_AC:
+                               vcpu->vc_inject.vie_errorcode = 0;
+                               /* fallthrough */
+                       case VMM_EX_DF:
+                       case VMM_EX_TS:
+                       case VMM_EX_NP:
+                       case VMM_EX_SS:
+                       case VMM_EX_GP:
+                       case VMM_EX_PF:
+                               /* Hardware exception. */
+                               vmcb->v_eventinj |= (3ULL << 8);
+
+                               if (vmcb->v_cr0 & CR0_PE) {
+                                       /* Error code valid. */
                                        vmcb->v_eventinj |= (1ULL << 11);
-                       }
-                       vmcb->v_eventinj |= (vcpu->vc_event) | (1U << 31);
-                       vmcb->v_eventinj |= (3ULL << 8); /* Exception */
-                       vcpu->vc_event = 0;
+                                       vmcb->v_eventinj |= (uint64_t)
+                                           vcpu->vc_inject.vie_errorcode << 32;
+                               }
+                               break;
+                       default:
+                               printf("%s: unsupported exception vector %u\n",
+                                   __func__, vcpu->vc_inject.vie_vector);
+                               ret = EINVAL;
+                       } /* switch */
+                       if (ret == EINVAL)
+                               break;
+
+                       /* Event is valid. */
+                       vmcb->v_eventinj |= (1U << 31);
+                       vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
                }
 
                TRACEPOINT(vmm, guest_enter, vcpu, vrp);
index 82aa105..05567f5 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmmvar.h,v 1.99 2024/04/01 05:11:49 guenther Exp $    */
+/*     $OpenBSD: vmmvar.h,v 1.100 2024/04/09 21:55:16 dv Exp $ */
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
  *
@@ -340,6 +340,7 @@ struct vm_exit_inout {
        uint32_t                vei_data;       /* data */
        uint8_t                 vei_insn_len;   /* Count of instruction bytes */
 };
+
 /*
  *  vm_exit_eptviolation       : describes an EPT VIOLATION exit
  */
@@ -352,6 +353,19 @@ struct vm_exit_eptviolation {
        uint8_t         vee_insn_bytes[15];     /* [SVM] bytes at {R,E,}IP */
 };
 
+/*
+ * struct vcpu_inject_event    : describes an exception or interrupt to inject.
+ */
+struct vcpu_inject_event {
+       uint8_t         vie_vector;     /* Exception or interrupt vector. */
+       uint32_t        vie_errorcode;  /* Optional error code. */
+       uint8_t         vie_type;
+#define VCPU_INJECT_NONE       0
+#define VCPU_INJECT_INTR       1       /* External hardware interrupt. */
+#define VCPU_INJECT_EX         2       /* HW or SW Exception */
+#define VCPU_INJECT_NMI                3       /* Non-maskable Interrupt */
+};
+
 /*
  * struct vcpu_segment_info
  *
@@ -465,7 +479,7 @@ struct vm_run_params {
        uint32_t        vrp_vm_id;
        uint32_t        vrp_vcpu_id;
        uint8_t         vrp_continue;           /* Continuing from an exit */
-       uint16_t        vrp_irq;                /* IRQ to inject */
+       struct vcpu_inject_event        vrp_inject;
        uint8_t         vrp_intr_pending;       /* Additional intrs pending? */
 
        /* Input/output parameter to VMM_IOC_RUN */
@@ -873,8 +887,7 @@ struct vcpu {
        uint64_t vc_h_xcr0;                     /* [v] */
 
        struct vcpu_gueststate vc_gueststate;   /* [v] */
-
-       uint8_t vc_event;
+       struct vcpu_inject_event vc_inject;     /* [v] */
 
        uint32_t vc_pvclock_version;            /* [v] */
        paddr_t vc_pvclock_system_gpa;          /* [v] */
index 86d5769..d5bfe7a 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vm.c,v 1.98 2024/02/20 21:40:37 dv Exp $      */
+/*     $OpenBSD: vm.c,v 1.99 2024/04/09 21:55:16 dv Exp $      */
 
 /*
  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -1536,7 +1536,6 @@ vcpu_run_loop(void *arg)
 {
        struct vm_run_params *vrp = (struct vm_run_params *)arg;
        intptr_t ret = 0;
-       int irq;
        uint32_t n;
 
        vrp->vrp_continue = 0;
@@ -1611,10 +1610,10 @@ vcpu_run_loop(void *arg)
                }
 
                if (vrp->vrp_irqready && i8259_is_pending()) {
-                       irq = i8259_ack();
-                       vrp->vrp_irq = irq;
+                       vrp->vrp_inject.vie_vector = i8259_ack();
+                       vrp->vrp_inject.vie_type = VCPU_INJECT_INTR;
                } else
-                       vrp->vrp_irq = 0xFFFF;
+                       vrp->vrp_inject.vie_type = VCPU_INJECT_NONE;
 
                /* Still more interrupts pending? */
                vrp->vrp_intr_pending = i8259_is_pending();