From c86bb406de1a7d9c03da29308568da5ba90607c4 Mon Sep 17 00:00:00 2001 From: mlarkin Date: Thu, 27 Apr 2017 06:16:39 +0000 Subject: [PATCH] vmm(4): proper save/restore of FPU context during entry/exit. tested by reyk, dcoppa, and a few others. ok kettenis@ on the fpu bits ok deraadt@ on the vmm bits --- sys/arch/amd64/amd64/fpu.c | 33 +---- sys/arch/amd64/amd64/vmm.c | 202 +++++++++++++++++++++++++++-- sys/arch/amd64/amd64/vmm_support.S | 8 +- sys/arch/amd64/include/cpufunc.h | 12 +- sys/arch/amd64/include/fpu.h | 33 ++++- sys/arch/amd64/include/vmmvar.h | 13 +- 6 files changed, 252 insertions(+), 49 deletions(-) diff --git a/sys/arch/amd64/amd64/fpu.c b/sys/arch/amd64/amd64/fpu.c index f6ef256421c..4b280fb6c9a 100644 --- a/sys/arch/amd64/amd64/fpu.c +++ b/sys/arch/amd64/amd64/fpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fpu.c,v 1.33 2016/04/21 22:08:27 mlarkin Exp $ */ +/* $OpenBSD: fpu.c,v 1.34 2017/04/27 06:16:39 mlarkin Exp $ */ /* $NetBSD: fpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */ /*- @@ -74,42 +74,11 @@ * state is saved. */ -#define fninit() __asm("fninit") -#define fwait() __asm("fwait") -#define fnclex() __asm("fnclex") -#define fxsave(addr) __asm("fxsave %0" : "=m" (*addr)) -#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr)) -#define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr)) -#define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) -#define clts() __asm("clts") -#define stts() lcr0(rcr0() | CR0_TS) - /* * The mask of enabled XSAVE features. */ uint64_t xsave_mask; -static inline void -xsave(struct savefpu *addr, uint64_t mask) -{ - uint32_t lo, hi; - - lo = mask; - hi = mask >> 32; - __asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) : - "memory"); -} - -static inline void -xrstor(struct savefpu *addr, uint64_t mask) -{ - uint32_t lo, hi; - - lo = mask; - hi = mask >> 32; - __asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi)); -} - void fpudna(struct cpu_info *); static int x86fpflags_to_siginfo(u_int32_t); diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c index 58371d8d499..1e55473c5c5 100644 --- a/sys/arch/amd64/amd64/vmm.c +++ b/sys/arch/amd64/amd64/vmm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm.c,v 1.133 2017/04/26 09:53:28 mlarkin Exp $ */ +/* $OpenBSD: vmm.c,v 1.134 2017/04/27 06:16:39 mlarkin Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include +#include #include #include #include @@ -145,6 +147,7 @@ int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int); int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *); int vmx_get_exit_info(uint64_t *, uint64_t *); int vmx_handle_exit(struct vcpu *); +int vmx_handle_xsetbv(struct vcpu *); int vmm_handle_cpuid(struct vcpu *); int vmx_handle_rdmsr(struct vcpu *); int vmx_handle_wrmsr(struct vcpu *); @@ -360,7 +363,7 @@ vmm_attach(struct device *parent, struct device *self, void *aux) pool_init(&vm_pool, sizeof(struct vm), 0, IPL_NONE, PR_WAITOK, "vmpool", NULL); - pool_init(&vcpu_pool, sizeof(struct vcpu), 0, IPL_NONE, PR_WAITOK, + pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_NONE, PR_WAITOK, "vcpupl", NULL); vmm_softc = sc; @@ -2373,6 +2376,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs) /* XXX CR0 shadow */ /* XXX CR4 shadow */ + /* xcr0 power on default sets bit 0 (x87 state) */ + vcpu->vc_gueststate.vg_xcr0 = XCR0_X87; + /* Flush the VMCS */ if (vmclear(&vcpu->vc_control_pa)) { ret = EINVAL; @@ -2498,7 +2504,7 @@ vcpu_init_vmx(struct vcpu *vcpu) } /* Host CR0 */ - cr0 = rcr0(); + cr0 = rcr0() & ~CR0_TS; if (vmwrite(VMCS_HOST_IA32_CR0, cr0)) { ret = EINVAL; goto exit; @@ -3353,6 +3359,42 @@ vcpu_must_stop(struct vcpu *vcpu) return (0); } +/* + * vmm_fpusave + * + * Modified version of fpusave_cpu from fpu.c that only saves the FPU context + * and does not call splipi/splx. Must be called with interrupts disabled. + */ +void +vmm_fpusave(void) +{ + struct proc *p; + struct cpu_info *ci = curcpu(); + + p = ci->ci_fpcurproc; + if (p == NULL) + return; + + if (ci->ci_fpsaving != 0) + panic("%s: recursive save!", __func__); + /* + * Set ci->ci_fpsaving, so that any pending exception will be + * thrown away. (It will be caught again if/when the FPU + * state is restored.) + */ + ci->ci_fpsaving = 1; + if (xsave_mask) + xsave(&p->p_addr->u_pcb.pcb_savefpu, xsave_mask); + else + fxsave(&p->p_addr->u_pcb.pcb_savefpu); + ci->ci_fpsaving = 0; + + p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; + + p->p_addr->u_pcb.pcb_fpcpu = NULL; + ci->ci_fpcurproc = NULL; +} + /* * vcpu_run_vmx * @@ -3404,6 +3446,8 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) break; case VMX_EXIT_CPUID: break; + case VMX_EXIT_XSETBV: + break; #ifdef VMM_DEBUG case VMX_EXIT_TRIPLE_FAULT: DPRINTF("%s: vm %d vcpu %d triple fault\n", @@ -3528,10 +3572,76 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) /* Start / resume the VCPU */ KERNEL_ASSERT_LOCKED(); + + /* Disable interrupts and save the current FPU state. */ + disable_intr(); + clts(); + vmm_fpusave(); + + /* Initialize the guest FPU if not inited already */ + if (!vcpu->vc_fpuinited) { + fninit(); + bzero(&vcpu->vc_g_fpu.fp_fxsave, + sizeof(vcpu->vc_g_fpu.fp_fxsave)); + vcpu->vc_g_fpu.fp_fxsave.fx_fcw = + __INITIAL_NPXCW__; + vcpu->vc_g_fpu.fp_fxsave.fx_mxcsr = + __INITIAL_MXCSR__; + fxrstor(&vcpu->vc_g_fpu.fp_fxsave); + + vcpu->vc_fpuinited = 1; + } + + if (xsave_mask) { + /* Restore guest XCR0 and FPU context */ + if (vcpu->vc_gueststate.vg_xcr0 & ~xsave_mask) { + DPRINTF("%s: guest attempted to set invalid " + "bits in xcr0\n", __func__); + ret = EINVAL; + stts(); + enable_intr(); + break; + } + + /* Restore guest %xcr0 */ + xrstor(&vcpu->vc_g_fpu, xsave_mask); + xsetbv(0, vcpu->vc_gueststate.vg_xcr0); + } else + fxrstor(&vcpu->vc_g_fpu.fp_fxsave); + KERNEL_UNLOCK(); ret = vmx_enter_guest(&vcpu->vc_control_pa, &vcpu->vc_gueststate, resume); + /* + * On exit, interrupts are disabled, and we are running with + * the guest FPU state still possibly on the CPU. Save the FPU + * state before re-enabling interrupts. + */ + if (xsave_mask) { + /* Save guest %xcr0 */ + vcpu->vc_gueststate.vg_xcr0 = xgetbv(0); + + /* Restore host %xcr0 */ + xsetbv(0, xsave_mask); + + /* + * Save full copy of FPU state - guest content is + * always a subset of host's save area (see xsetbv + * exit handler) + */ + xsave(&vcpu->vc_g_fpu, xsave_mask); + } else + fxsave(&vcpu->vc_g_fpu); + + /* + * FPU state is invalid, set CR0_TS to force DNA trap on next + * access. + */ + stts(); + + enable_intr(); + exit_reason = VM_EXIT_NONE; if (ret == 0) { /* @@ -3545,6 +3655,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp) printf("%s: can't read guest rflags during " "exit\n", __func__); ret = EINVAL; + KERNEL_LOCK(); break; } } @@ -3826,6 +3937,10 @@ vmx_handle_exit(struct vcpu *vcpu) ret = vmx_handle_wrmsr(vcpu); update_rip = 1; break; + case VMX_EXIT_XSETBV: + ret = vmx_handle_xsetbv(vcpu); + update_rip = 1; + break; case VMX_EXIT_TRIPLE_FAULT: #ifdef VMM_DEBUG DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__, @@ -4350,6 +4465,62 @@ vmx_handle_rdmsr(struct vcpu *vcpu) return (0); } +/* + * vmx_handle_xsetbv + * + * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values + * limited to the xsave_mask in use in the host. + * + * Parameters: + * vcpu: vcpu structure containing instruction info causing the exit + * + * Return value: + * 0: The operation was successful + * EINVAL: An error occurred + */ +int +vmx_handle_xsetbv(struct vcpu *vcpu) +{ + uint64_t insn_length; + uint64_t *rax, *rdx, *rcx;; + + if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { + printf("%s: can't obtain instruction length\n", __func__); + return (EINVAL); + } + + /* All XSETBV instructions are 0x0F 0x01 0xD1 */ + KASSERT(insn_length == 3); + + rax = &vcpu->vc_gueststate.vg_rax; + rcx = &vcpu->vc_gueststate.vg_rcx; + rdx = &vcpu->vc_gueststate.vg_rdx; + + if (*rcx != 0) { + DPRINTF("%s: guest specified invalid xcr register number " + "%lld\n", __func__, *rcx); + /* XXX this should #GP(0) instead of killing the guest */ + return (EINVAL); + } + + /* + * No bits in %edx are currently supported. Check this, and validate + * against the host mask. + */ + if (*rdx != 0 || (*rax & ~xsave_mask)) { + DPRINTF("%s: guest specified invalid xcr0 content " + "(0x%llx:0x%llx)\n", __func__, *rdx, *rax); + /* XXX this should #GP(0) instead of killing the guest */ + return (EINVAL); + } + + vcpu->vc_gueststate.vg_xcr0 = *rax; + + vcpu->vc_gueststate.vg_rip += insn_length; + + return (0); +} + /* * vmx_handle_wrmsr * @@ -4413,6 +4584,7 @@ vmm_handle_cpuid(struct vcpu *vcpu) { uint64_t insn_length; uint64_t *rax, *rbx, *rcx, *rdx; + uint32_t eax, ebx, ecx, edx; if (vmm_softc->mode == VMM_MODE_VMX || vmm_softc->mode == VMM_MODE_EPT) { @@ -4431,9 +4603,11 @@ vmm_handle_cpuid(struct vcpu *vcpu) rcx = &vcpu->vc_gueststate.vg_rcx; rdx = &vcpu->vc_gueststate.vg_rdx; + CPUID_LEAF(rax, 0, eax, ebx, ecx, edx); + switch (*rax) { case 0x00: /* Max level and vendor ID */ - *rax = 0x07; /* cpuid_level */ + *rax = 0x0d; /* cpuid_level */ *rbx = *((uint32_t *)&cpu_vendor); *rdx = *((uint32_t *)&cpu_vendor + 1); *rcx = *((uint32_t *)&cpu_vendor + 2); @@ -4580,13 +4754,19 @@ vmm_handle_cpuid(struct vcpu *vcpu) *rcx = 0; *rdx = 0; break; - case 0x0d: /* Processor ext. state information (not supported) */ - DPRINTF("%s: function 0x0d (ext. state info) not supported\n", - __func__); - *rax = 0; - *rbx = 0; - *rcx = 0; - *rdx = 0; + case 0x0d: /* Processor ext. state information */ + if (*rcx == 0) { + *rax = xsave_mask; + *rbx = ebx; + *rcx = ecx; + *rdx = edx; + } else { + CPUID_LEAF(rax, *rcx, eax, ebx, ecx, edx); + *rax = eax; + *rbx = ebx; + *rcx = ecx; + *rdx = edx; + } break; case 0x0f: /* QoS info (not supported) */ DPRINTF("%s: function 0x0f (QoS info) not supported\n", diff --git a/sys/arch/amd64/amd64/vmm_support.S b/sys/arch/amd64/amd64/vmm_support.S index 8fde032b981..56b3dbdfda8 100644 --- a/sys/arch/amd64/amd64/vmm_support.S +++ b/sys/arch/amd64/amd64/vmm_support.S @@ -1,4 +1,4 @@ -/* $OpenBSD: vmm_support.S,v 1.7 2017/03/25 15:25:20 kettenis Exp $ */ +/* $OpenBSD: vmm_support.S,v 1.8 2017/04/27 06:16:39 mlarkin Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -17,6 +17,7 @@ #include "assym.h" #include +#include #include /* @@ -154,6 +155,9 @@ skip_init: */ pushfq + popq %rax + andq $(~PSL_I), %rax + pushq %rax /* * Save (possibly) lazy-switched selectors @@ -354,7 +358,6 @@ restore_host: * first. This is to accommodate possibly lazy-switched * selectors from above */ - cli popq %rdx popq %rax movq $MSR_KERNELGSBASE, %rcx @@ -371,7 +374,6 @@ restore_host: popq %rax movq $MSR_FSBASE, %rcx wrmsr - sti popw %ax movw %ax, %ss diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h index f0599634079..bda18a00287 100644 --- a/sys/arch/amd64/include/cpufunc.h +++ b/sys/arch/amd64/include/cpufunc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.h,v 1.13 2016/09/04 09:22:28 mpi Exp $ */ +/* $OpenBSD: cpufunc.h,v 1.14 2017/04/27 06:16:39 mlarkin Exp $ */ /* $NetBSD: cpufunc.h,v 1.3 2003/05/08 10:27:43 fvdl Exp $ */ /*- @@ -335,6 +335,16 @@ xsetbv(uint32_t reg, uint64_t mask) __asm volatile("xsetbv" :: "c" (reg), "a" (lo), "d" (hi) : "memory"); } +static __inline uint64_t +xgetbv(uint32_t reg) +{ + uint32_t lo, hi; + + __asm volatile("xgetbv" : "=a" (lo), "=d" (hi) : "c" (reg)); + + return (((uint64_t)hi << 32) | (uint64_t)lo); +} + /* Break into DDB/KGDB. */ static __inline void breakpoint(void) diff --git a/sys/arch/amd64/include/fpu.h b/sys/arch/amd64/include/fpu.h index 9dc3bb2afe4..ff12f60ba69 100644 --- a/sys/arch/amd64/include/fpu.h +++ b/sys/arch/amd64/include/fpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fpu.h,v 1.11 2015/03/25 21:05:18 kettenis Exp $ */ +/* $OpenBSD: fpu.h,v 1.12 2017/04/27 06:16:39 mlarkin Exp $ */ /* $NetBSD: fpu.h,v 1.1 2003/04/26 18:39:40 fvdl Exp $ */ #ifndef _MACHINE_FPU_H_ @@ -71,6 +71,37 @@ void fpusave_cpu(struct cpu_info *, int); void fpu_kernel_enter(void); void fpu_kernel_exit(void); +#define fninit() __asm("fninit") +#define fwait() __asm("fwait") +#define fnclex() __asm("fnclex") +#define fxsave(addr) __asm("fxsave %0" : "=m" (*addr)) +#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr)) +#define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr)) +#define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) +#define clts() __asm("clts") +#define stts() lcr0(rcr0() | CR0_TS) + +static inline void +xsave(struct savefpu *addr, uint64_t mask) +{ + uint32_t lo, hi; + + lo = mask; + hi = mask >> 32; + __asm volatile("xsave %0" : "=m" (*addr) : "a" (lo), "d" (hi) : + "memory"); +} + +static inline void +xrstor(struct savefpu *addr, uint64_t mask) +{ + uint32_t lo, hi; + + lo = mask; + hi = mask >> 32; + __asm volatile("xrstor %0" : : "m" (*addr), "a" (lo), "d" (hi)); +} + #endif #endif /* _MACHINE_FPU_H_ */ diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h index 7c657c9e160..30b42a97086 100644 --- a/sys/arch/amd64/include/vmmvar.h +++ b/sys/arch/amd64/include/vmmvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vmmvar.h,v 1.32 2017/03/23 08:05:58 mlarkin Exp $ */ +/* $OpenBSD: vmmvar.h,v 1.33 2017/04/27 06:16:39 mlarkin Exp $ */ /* * Copyright (c) 2014 Mike Larkin * @@ -638,6 +638,7 @@ struct vmx_gueststate uint64_t vg_rip; /* 0x80 */ uint32_t vg_exit_reason; /* 0x88 */ uint64_t vg_rflags; /* 0x90 */ + uint64_t vg_xcr0; /* 0x98 */ }; /* @@ -649,6 +650,12 @@ struct vm; * Virtual CPU */ struct vcpu { + /* + * Guest FPU state - this must remain as the first member of the struct + * to ensure 64-byte alignment (set up during vcpu_pool init) + */ + struct savefpu vc_g_fpu; + /* VMCS / VMCB pointer */ vaddr_t vc_control_va; uint64_t vc_control_pa; @@ -674,6 +681,10 @@ struct vcpu { uint16_t vc_intr; uint8_t vc_irqready; + uint8_t vc_fpuinited; + + uint64_t vc_h_xcr0; + /* VMX only */ uint64_t vc_vmx_basic; uint64_t vc_vmx_entry_ctls; -- 2.20.1