make vmm(4) less responsible for initial register state, preferring to let
authormlarkin <mlarkin@openbsd.org>
Wed, 29 Nov 2017 02:46:10 +0000 (02:46 +0000)
committermlarkin <mlarkin@openbsd.org>
Wed, 29 Nov 2017 02:46:10 +0000 (02:46 +0000)
usermode daemons handle that.

ok pd@

sys/arch/amd64/amd64/vmm.c
sys/arch/amd64/include/vmmvar.h
sys/arch/i386/include/vmmvar.h
usr.sbin/vmd/loadfile.h
usr.sbin/vmd/loadfile_elf.c
usr.sbin/vmd/vm.c

index 8400433..994a656 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmm.c,v 1.177 2017/11/29 00:38:01 mlarkin Exp $       */
+/*     $OpenBSD: vmm.c,v 1.178 2017/11/29 02:46:10 mlarkin Exp $       */
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
  *
@@ -1458,6 +1458,14 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask,
                        goto errout;
                if (vmread(VMCS_GUEST_IA32_CR4, &crs[VCPU_REGS_CR4]))
                        goto errout;
+               if (vmread(VMCS_GUEST_PDPTE0, &crs[VCPU_REGS_PDPTE0]))
+                       goto errout;
+               if (vmread(VMCS_GUEST_PDPTE1, &crs[VCPU_REGS_PDPTE1]))
+                       goto errout;
+               if (vmread(VMCS_GUEST_PDPTE2, &crs[VCPU_REGS_PDPTE2]))
+                       goto errout;
+               if (vmread(VMCS_GUEST_PDPTE3, &crs[VCPU_REGS_PDPTE3]))
+                       goto errout;
        }
 
        msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
@@ -1585,6 +1593,14 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
                        goto errout;
                if (vmwrite(VMCS_GUEST_IA32_CR4, crs[VCPU_REGS_CR4]))
                        goto errout;
+               if (vmwrite(VMCS_GUEST_PDPTE0, crs[VCPU_REGS_PDPTE0]))
+                       goto errout;
+               if (vmwrite(VMCS_GUEST_PDPTE1, crs[VCPU_REGS_PDPTE1]))
+                       goto errout;
+               if (vmwrite(VMCS_GUEST_PDPTE2, crs[VCPU_REGS_PDPTE2]))
+                       goto errout;
+               if (vmwrite(VMCS_GUEST_PDPTE3, crs[VCPU_REGS_PDPTE3]))
+                       goto errout;
        }
 
        msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
@@ -2093,6 +2109,8 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
        ret = 0;
        ug = 0;
 
+       cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
+
        if (vcpu_reload_vmcs_vmx(&vcpu->vc_control_pa)) {
                DPRINTF("%s: error reloading VMCS\n", __func__);
                return (EINVAL);
@@ -2215,7 +2233,8 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
         *
         * If we have unrestricted guest capability, we must be able to set
         * the following:
-        * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest
+        * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller
+        *     specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter)
         */
        want1 = 0;
 
@@ -2236,8 +2255,10 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
            IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
                if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
                    IA32_VMX_UNRESTRICTED_GUEST, 1)) {
-                       want1 |= IA32_VMX_UNRESTRICTED_GUEST;
-                       ug = 1;
+                       if ((cr0 & (CR0_PE | CR0_PG)) == 0) {
+                               want1 |= IA32_VMX_UNRESTRICTED_GUEST;
+                               ug = 1;
+                       }
                }
        }
 
@@ -2302,10 +2323,10 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
         * IA32_VMX_LOAD_DEBUG_CONTROLS
         * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
         */
-       if (ug == 1 && !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA))
-               want1 = 0;
-       else
+       if (vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)
                want1 = IA32_VMX_IA32E_MODE_GUEST;
+       else
+               want1 = 0;
 
        want0 = IA32_VMX_ENTRY_TO_SMM |
            IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT |
@@ -2381,7 +2402,6 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
         * value as per Intel SDM A.7.
         * CR0 bits in the vrs parameter must match these.
         */
-
        want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
            (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
        want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
@@ -2394,12 +2414,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
         * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if
         * the CPU has the unrestricted guest capability.
         */
-       cr0 = vrs->vrs_crs[VCPU_REGS_CR0];
-
        if (ug) {
                want1 &= ~(CR0_PG | CR0_PE);
                want0 &= ~(CR0_PG | CR0_PE);
-               cr0 &= ~(CR0_PG | CR0_PE);
        }
 
        /*
@@ -2413,31 +2430,66 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
                ret = EINVAL;
                goto exit;
        }
+
        if ((~cr0 & want0) != want0) {
                ret = EINVAL;
                goto exit;
        }
 
-       if (ug)
-               cr3 = 0;
-       else
-               cr3 = vrs->vrs_crs[VCPU_REGS_CR3];
-
        /*
-        * Determine default CR4 as per Intel SDM A.8
-        * All flexible bits are set to 0
+        * Determine which bits in CR4 have to be set to a fixed
+        * value as per Intel SDM A.8.
+        * CR4 bits in the vrs parameter must match these, except
+        * CR$_VMXE - we add that here since it must always be set.
         */
-       cr4 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
+       want1 = (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
            (curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
+       want0 = ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
+           ~(curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
 
-       /*
-        * If we are starting in restricted guest mode, enable PAE
-        */
-       if (ug == 0)
-               cr4 |= CR4_PAE;
+       cr4 = vrs->vrs_crs[VCPU_REGS_CR4] | CR4_VMXE;
+
+       if ((cr4 & want1) != want1) {
+               ret = EINVAL;
+               goto exit;
+       }
+
+       if ((~cr4 & want0) != want0) {
+               ret = EINVAL;
+               goto exit;
+       }
+
+       cr3 = vrs->vrs_crs[VCPU_REGS_CR3];
+
+       /* Restore PDPTEs if 32-bit PAE paging is being used */
+       if (cr3 && (cr4 & CR4_PAE) &&
+           !(vrs->vrs_msrs[VCPU_REGS_EFER] & EFER_LMA)) {
+               if (vmwrite(VMCS_GUEST_PDPTE0,
+                   vrs->vrs_crs[VCPU_REGS_PDPTE0])) {
+                       ret = EINVAL;
+                       goto exit;
+               }
+
+               if (vmwrite(VMCS_GUEST_PDPTE1,
+                   vrs->vrs_crs[VCPU_REGS_PDPTE1])) {
+                       ret = EINVAL;
+                       goto exit;
+               }
+
+               if (vmwrite(VMCS_GUEST_PDPTE2,
+                   vrs->vrs_crs[VCPU_REGS_PDPTE2])) {
+                       ret = EINVAL;
+                       goto exit;
+               }
+
+               if (vmwrite(VMCS_GUEST_PDPTE3,
+                   vrs->vrs_crs[VCPU_REGS_PDPTE3])) {
+                       ret = EINVAL;
+                       goto exit;
+               }
+       }
 
        vrs->vrs_crs[VCPU_REGS_CR0] = cr0;
-       vrs->vrs_crs[VCPU_REGS_CR3] = cr3;
        vrs->vrs_crs[VCPU_REGS_CR4] = cr4;
 
        /*
@@ -2560,13 +2612,6 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
         */
        ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL, 0, vrs);
 
-       /*
-        * Make sure LME is enabled in EFER if restricted guest mode is
-        * needed.
-        */
-       if (ug == 0)
-               msr_store[VCPU_REGS_EFER].vms_data |= EFER_LME;
-
        /*
         * Set up the MSR bitmap
         */
index 2294acb..b6fe6ec 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vmmvar.h,v 1.49 2017/11/29 00:32:52 mlarkin Exp $     */
+/*     $OpenBSD: vmmvar.h,v 1.50 2017/11/29 02:46:10 mlarkin Exp $     */
 /*
  * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
  *
@@ -384,7 +384,11 @@ struct vcpu_segment_info {
 #define VCPU_REGS_CR4  3
 #define VCPU_REGS_CR8  4
 #define VCPU_REGS_XCR0 5
-#define VCPU_REGS_NCRS (VCPU_REGS_XCR0 + 1)
+#define VCPU_REGS_PDPTE0 6
+#define VCPU_REGS_PDPTE1 7
+#define VCPU_REGS_PDPTE2 8
+#define VCPU_REGS_PDPTE3 9
+#define VCPU_REGS_NCRS (VCPU_REGS_PDPTE3 + 1)
 
 #define VCPU_REGS_CS           0
 #define VCPU_REGS_DS           1
index 640120a..6071578 100644 (file)
@@ -315,12 +315,16 @@ struct vcpu_segment_info {
 #define VCPU_REGS_EFLAGS       9
 #define VCPU_REGS_NGPRS                (VCPU_REGS_EFLAGS + 1)
 
-#define VCPU_REGS_CR0  0
-#define VCPU_REGS_CR2  1
-#define VCPU_REGS_CR3  2
-#define VCPU_REGS_CR4  3
-#define VCPU_REGS_CR8  4
-#define VCPU_REGS_NCRS (VCPU_REGS_CR8 + 1)
+#define VCPU_REGS_CR0          0
+#define VCPU_REGS_CR2          1
+#define VCPU_REGS_CR3          2
+#define VCPU_REGS_CR4          3
+#define VCPU_REGS_CR8          4
+#define VCPU_REGS_PDPTE0       5
+#define VCPU_REGS_PDPTE1       6
+#define VCPU_REGS_PDPTE2       7
+#define VCPU_REGS_PDPTE3       8
+#define VCPU_REGS_NCRS (VCPU_REGS_PDPTE3 + 1)
 
 #define VCPU_REGS_CS           0
 #define VCPU_REGS_DS           1
index 8313df0..88d7457 100644 (file)
@@ -1,5 +1,5 @@
 /*     $NetBSD: loadfile.h,v 1.1 1999/04/28 09:08:50 christos Exp $     */
-/*     $OpenBSD: loadfile.h,v 1.9 2017/06/07 14:41:53 mlarkin Exp $     */
+/*     $OpenBSD: loadfile.h,v 1.10 2017/11/29 02:46:10 mlarkin Exp $    */
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -71,7 +71,7 @@
 #define PML4_PAGE 0x11000
 #define PML3_PAGE 0x12000
 #define PML2_PAGE 0x13000
-#define NPTE_PG (PAGE_SIZE / sizeof(pt_entry_t))
+#define NPTE_PG (PAGE_SIZE / sizeof(uint64_t))
 
 int loadfile_elf(FILE *, struct vm_create_params *,
     struct vcpu_reg_state *, uint32_t, uint32_t);
index 02b0ab8..586e491 100644 (file)
@@ -1,5 +1,5 @@
 /* $NetBSD: loadfile.c,v 1.10 2000/12/03 02:53:04 tsutsui Exp $ */
-/* $OpenBSD: loadfile_elf.c,v 1.28 2017/11/27 03:19:00 mlarkin Exp $ */
+/* $OpenBSD: loadfile_elf.c,v 1.29 2017/11/29 02:46:10 mlarkin Exp $ */
 
 /*-
  * Copyright (c) 1997 The NetBSD Foundation, Inc.
@@ -226,33 +226,24 @@ push_gdt(void)
 static void
 push_pt(void)
 {
-       pt_entry_t ptes[NPTE_PG];
-       uint64_t i;
+       uint64_t ptes[NPTE_PG], i;
 
-#ifdef __i386__
-       memset(ptes, 0, sizeof(ptes));
-       for (i = 0 ; i < NPTE_PG; i++) {
-               ptes[i] = PG_V | PG_PS | (NBPD * i);
-       }
-       write_mem(PML4_PAGE, ptes, PAGE_SIZE);
-#else
-       /* PML3 [0] - first 1GB */
+       /* PDPDE0 - first 1GB */
        memset(ptes, 0, sizeof(ptes));
        ptes[0] = PG_V | PML3_PAGE;
        write_mem(PML4_PAGE, ptes, PAGE_SIZE);
 
-       /* PML3 [0] - first 1GB */
+       /* PDE0 - first 1GB */
        memset(ptes, 0, sizeof(ptes));
        ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
        write_mem(PML3_PAGE, ptes, PAGE_SIZE);
 
-       /* PML2 [0..511] - first 1GB (in 2MB pages) */
+       /* First 1GB (in 2MB pages) */
        memset(ptes, 0, sizeof(ptes));
        for (i = 0 ; i < NPTE_PG; i++) {
-               ptes[i] = PG_V | PG_RW | PG_u | PG_PS | (NBPD_L2 * i);
+               ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
        }
        write_mem(PML2_PAGE, ptes, PAGE_SIZE);
-#endif
 }
 
 /*
index 51532f3..05292be 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vm.c,v 1.29 2017/11/28 23:58:30 mlarkin Exp $ */
+/*     $OpenBSD: vm.c,v 1.30 2017/11/29 02:46:10 mlarkin Exp $ */
 
 /*
  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
@@ -31,6 +31,7 @@
 
 #include <machine/param.h>
 #include <machine/psl.h>
+#include <machine/pte.h>
 #include <machine/specialreg.h>
 #include <machine/vmmvar.h>
 
@@ -133,6 +134,11 @@ static const struct vcpu_reg_state vcpu_init_flat32 = {
 #endif
        .vrs_crs[VCPU_REGS_CR0] = CR0_CD | CR0_NW | CR0_ET | CR0_PE | CR0_PG,
        .vrs_crs[VCPU_REGS_CR3] = PML4_PAGE,
+       .vrs_crs[VCPU_REGS_CR4] = CR4_PAE | CR4_PSE,
+       .vrs_crs[VCPU_REGS_PDPTE0] = PML3_PAGE | PG_V,
+       .vrs_crs[VCPU_REGS_PDPTE1] = 0ULL,
+       .vrs_crs[VCPU_REGS_PDPTE2] = 0ULL,
+       .vrs_crs[VCPU_REGS_PDPTE3] = 0ULL,
        .vrs_sregs[VCPU_REGS_CS] = { 0x8, 0xFFFFFFFF, 0xC09F, 0x0},
        .vrs_sregs[VCPU_REGS_DS] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
        .vrs_sregs[VCPU_REGS_ES] = { 0x10, 0xFFFFFFFF, 0xC093, 0x0},
@@ -143,7 +149,7 @@ static const struct vcpu_reg_state vcpu_init_flat32 = {
        .vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
        .vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
        .vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
-       .vrs_msrs[VCPU_REGS_EFER] = 0ULL,
+       .vrs_msrs[VCPU_REGS_EFER] = EFER_LME | EFER_LMA,
 #ifndef __i386__
        .vrs_msrs[VCPU_REGS_STAR] = 0ULL,
        .vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,