From: drahn Date: Thu, 20 May 2021 04:22:33 +0000 (+0000) Subject: Significant overhaul of the floating point save/restore code. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=3d760e7fddc58c8d08b1f1a5417be60eb6e00903;p=openbsd Significant overhaul of the floating point save/restore code. At this point the mechanism should closely resemble the powerpc64 save/restore points with one difference. (reload avoidance) The previous 'aggressive' fpu save code that was (mostly) implemented before and is present on arm32 and arm64. There is one piece from that other design that remains, if pcb->pcb_fpcpu == ci && ci->ci_fpuproc == p after sleep, this will automatically re-activate the FPU state without needing to reload it. To enable this, the pointer pair is not changed on FPU context save to indicate that the CPU still holds the valid content as long as both of those pointers are pointing to each other. Note that if another core steals the FPU conxtex (when we get to SMP) the pcb->pcb_fpcpu will be another cpu, and from that it will know to reload the FPU context. Also optimistically enabling this only makes sense on riscv64 because there is the notion of FPU on and clean. Other implimentations would need to 'fault on' the FPU enable, but could avoid the FPU context load if no other processor has run this FPU context and no other process has use FPU on this core. ok kettenis@ deraadt@ Prior to a couple of fixes. --- diff --git a/sys/arch/riscv64/riscv64/cpuswitch.S b/sys/arch/riscv64/riscv64/cpuswitch.S index 004042f8c5c..c396659aa73 100644 --- a/sys/arch/riscv64/riscv64/cpuswitch.S +++ b/sys/arch/riscv64/riscv64/cpuswitch.S @@ -1,4 +1,4 @@ -/* $OpenBSD: cpuswitch.S,v 1.2 2021/05/12 01:20:52 jsg Exp $ */ +/* $OpenBSD: cpuswitch.S,v 1.3 2021/05/20 04:22:33 drahn Exp $ */ /* * Copyright (c) 2015 Dale Rahn @@ -25,7 +25,7 @@ * a0 'struct proc *' of the old context * a1 'struct proc *' of the new context */ -ENTRY(cpu_switchto) +ENTRY(cpu_switchto_asm) // check if old context needs to be saved beqz a0, 1f @@ -93,7 +93,7 @@ ENTRY(cpu_switchto) addi sp, sp, SWITCHFRAME_SIZEOF RETGUARD_CHECK(cpu_switchto, a7) ret -END(cpu_switch) +END(cpu_switch_asm) ENTRY(proc_trampoline) diff --git a/sys/arch/riscv64/riscv64/exception.S b/sys/arch/riscv64/riscv64/exception.S index 719f9ab9c8b..bc92e22f9ba 100644 --- a/sys/arch/riscv64/riscv64/exception.S +++ b/sys/arch/riscv64/riscv64/exception.S @@ -1,4 +1,4 @@ -/* $OpenBSD: exception.S,v 1.4 2021/05/13 06:44:11 kettenis Exp $ */ +/* $OpenBSD: exception.S,v 1.5 2021/05/20 04:22:33 drahn Exp $ */ /*- * Copyright (c) 2015-2018 Ruslan Bukin @@ -108,6 +108,8 @@ li t0, SSTATUS_SUM csrc sstatus, t0 .endif + li t0, SSTATUS_FS_MASK + csrc sstatus, t0 csrr t0, stval sd t0, (TF_STVAL)(sp) csrr t0, scause diff --git a/sys/arch/riscv64/riscv64/fpu.c b/sys/arch/riscv64/riscv64/fpu.c index 7c502b237b6..974b972dbbb 100644 --- a/sys/arch/riscv64/riscv64/fpu.c +++ b/sys/arch/riscv64/riscv64/fpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fpu.c,v 1.7 2021/05/12 01:20:52 jsg Exp $ */ +/* $OpenBSD: fpu.c,v 1.8 2021/05/20 04:22:33 drahn Exp $ */ /* * Copyright (c) 2020 Dale Rahn @@ -83,6 +83,7 @@ fpu_save(struct proc *p, struct trapframe *frame) /* fallthru */ } + fpu_enable_clean(); __asm volatile("frcsr %0" : "=r"(fcsr)); fp->fp_fcsr = fcsr; @@ -129,7 +130,7 @@ fpu_save(struct proc *p, struct trapframe *frame) */ p->p_addr->u_pcb.pcb_tf->tf_sstatus &= ~SSTATUS_FS_MASK; - void fpu_enable_disable(); + fpu_disable(); } void @@ -145,6 +146,10 @@ fpu_load(struct proc *p) * Verify that context is not already loaded */ if (pcb->pcb_fpcpu == ci && ci->ci_fpuproc == p) { + /* fpu state loaded, enable it */ + if ((pcb->pcb_tf->tf_sstatus & SSTATUS_FS_MASK) == + SSTATUS_FS_OFF) + pcb->pcb_tf->tf_sstatus |= SSTATUS_FS_CLEAN; return; } //printf("FPU load requested %p %p \n", ci, p); @@ -199,6 +204,7 @@ fpu_load(struct proc *p) */ pcb->pcb_fpcpu = ci; ci->ci_fpuproc = p; + p->p_addr->u_pcb.pcb_tf->tf_sstatus |= SSTATUS_FS_CLEAN; - void fpu_enable_disable(); + fpu_disable(); } diff --git a/sys/arch/riscv64/riscv64/machdep.c b/sys/arch/riscv64/riscv64/machdep.c index e54942ee9da..0e24c9a3210 100644 --- a/sys/arch/riscv64/riscv64/machdep.c +++ b/sys/arch/riscv64/riscv64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.18 2021/05/19 21:23:20 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.19 2021/05/20 04:22:33 drahn Exp $ */ /* * Copyright (c) 2014 Patrick Wildt @@ -282,6 +282,37 @@ cpu_startup(void) } } +/* + * Move parts of cpu_switchto into C, too difficult in asm + */ + +void cpu_switchto_asm(struct proc *, struct proc *); + +void +cpu_switchto(struct proc *old, struct proc *new) +{ + struct cpu_info *ci = curcpu(); + struct trapframe *tf; + struct pcb *pcb; + + /* old may be NULL, do not save context */ + if (old != NULL) { + tf = old->p_addr->u_pcb.pcb_tf; + if ((tf->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_DIRTY) { + fpu_save(old, tf); + } + } + + cpu_switchto_asm(old, new); + + pcb = ci->ci_curpcb; + tf = new->p_addr->u_pcb.pcb_tf; + if (pcb->pcb_fpcpu == ci && ci->ci_fpuproc == new) { + /* If fpu state is already loaded, allow it to be used */ + tf->tf_sstatus |= SSTATUS_FS_CLEAN; + } +} + /* * machine dependent system variables. */ diff --git a/sys/arch/riscv64/riscv64/process_machdep.c b/sys/arch/riscv64/riscv64/process_machdep.c index ce0539f4ba7..dd0bc8d3e87 100644 --- a/sys/arch/riscv64/riscv64/process_machdep.c +++ b/sys/arch/riscv64/riscv64/process_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: process_machdep.c,v 1.4 2021/05/14 06:48:52 jsg Exp $ */ +/* $OpenBSD: process_machdep.c,v 1.5 2021/05/20 04:22:33 drahn Exp $ */ /* * Copyright (c) 2014 Patrick Wildt @@ -73,6 +73,13 @@ process_read_regs(struct proc *p, struct reg *regs) int process_read_fpregs(struct proc *p, struct fpreg *regs) { + struct trapframe *tf = p->p_addr->u_pcb.pcb_tf; + + if ((tf->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_DIRTY) { + tf->tf_sstatus &= ~SSTATUS_FS_MASK; /* disable fpu */ + fpu_save(p, tf); + } + if (p->p_addr->u_pcb.pcb_flags & PCB_FPU) memcpy(regs, &p->p_addr->u_pcb.pcb_fpstate, sizeof(*regs)); else @@ -96,8 +103,10 @@ process_write_regs(struct proc *p, struct reg *regs) tf->tf_gp = regs->r_gp; tf->tf_tp = regs->r_tp; //XXX tf->tf_sepc = regs->r_sepc; + //p->p_addr->u_pcb.pcb_tcb = (void *)regs->r_tp;//XXX why? freebsd just copied r_tp to tf_tp - //XXX should we add r_sepc and sstatus also? + + return(0); } @@ -105,9 +114,15 @@ process_write_regs(struct proc *p, struct reg *regs) int process_write_fpregs(struct proc *p, struct fpreg *regs) { + struct cpu_info *ci = curcpu(); + struct trapframe *tf = p->p_addr->u_pcb.pcb_tf; + p->p_addr->u_pcb.pcb_flags |= PCB_FPU; memcpy(&p->p_addr->u_pcb.pcb_fpstate, regs, sizeof(p->p_addr->u_pcb.pcb_fpstate)); + + tf->tf_sstatus &= ~SSTATUS_FS_MASK; /* disable fpu */ + fpu_discard(p); return(0); } #endif diff --git a/sys/arch/riscv64/riscv64/sig_machdep.c b/sys/arch/riscv64/riscv64/sig_machdep.c index c4fe890e554..cd804c3c997 100644 --- a/sys/arch/riscv64/riscv64/sig_machdep.c +++ b/sys/arch/riscv64/riscv64/sig_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sig_machdep.c,v 1.4 2021/05/14 06:48:52 jsg Exp $ */ +/* $OpenBSD: sig_machdep.c,v 1.5 2021/05/20 04:22:33 drahn Exp $ */ /* * Copyright (c) 1990 The Regents of the University of California. @@ -117,6 +117,7 @@ sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip) struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp = p->p_p->ps_sigacts; + struct fpreg *fpreg; siginfo_t *sip = NULL; int i; @@ -157,8 +158,14 @@ sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip) /* Save signal mask. */ frame.sf_sc.sc_mask = mask; - /* XXX Save floating point context */ - /* XXX! */ + if (p->p_addr->u_pcb.pcb_flags & PCB_FPU) { + fpu_save(p, tf); + fpreg = &p->p_addr->u_pcb.pcb_fpstate; + for (i=0; i < 32; i++) { + frame.sf_sc.sc_f[i] = fpreg->fp_f[i]; + } + frame.sf_sc.sc_fcsr = fpreg->fp_fcsr; + } if (psp->ps_siginfo & sigmask(sig)) { sip = &fp->sf_si; @@ -211,6 +218,7 @@ sys_sigreturn(struct proc *p, void *v, register_t *retval) } */ *uap = v; struct sigcontext ksc, *scp = SCARG(uap, sigcntxp); struct trapframe *tf; + struct fpreg *fpreg; int i; if (PROC_PC(p) != p->p_p->ps_sigcoderet) { @@ -231,19 +239,6 @@ sys_sigreturn(struct proc *p, void *v, register_t *retval) (void)copyout(&ksc.sc_cookie, (caddr_t)scp + offsetof(struct sigcontext, sc_cookie), sizeof (ksc.sc_cookie)); - /* - * Make sure the processor mode has not been tampered with and - * interrupts have not been disabled. - */ -#if 0 - /* XXX include sanity check */ - if ((ksc.sc_spsr & PSR_M_MASK) != PSR_M_EL0t || - (ksc.sc_spsr & (PSR_I | PSR_F)) != 0) - return (EINVAL); -#endif - - /* XXX Restore floating point context */ - /* Restore register context. */ tf = process_frame(p); for (i=0; i < 7; i++) @@ -257,6 +252,18 @@ sys_sigreturn(struct proc *p, void *v, register_t *retval) tf->tf_tp = ksc.sc_tp; tf->tf_sepc = ksc.sc_sepc; + if (p->p_addr->u_pcb.pcb_flags & PCB_FPU) { + fpreg = &p->p_addr->u_pcb.pcb_fpstate; + for (i=0; i < 32; i++) { + fpreg->fp_f[i] = ksc.sc_f[i]; + } + fpreg->fp_fcsr = ksc.sc_fcsr; + + /* force disable and discard FPU contents */ + tf->tf_sstatus &= ~SSTATUS_FS_MASK; /* disable fpu */ + fpu_discard(p); + } + //dumpframe ("after", tf, 0); /* Restore signal mask. */ diff --git a/sys/arch/riscv64/riscv64/syscall.c b/sys/arch/riscv64/riscv64/syscall.c index abd37b60631..8fb24ba5c2b 100644 --- a/sys/arch/riscv64/riscv64/syscall.c +++ b/sys/arch/riscv64/riscv64/syscall.c @@ -1,4 +1,4 @@ -/* $OpenBSD: syscall.c,v 1.8 2021/05/16 03:29:35 jsg Exp $ */ +/* $OpenBSD: syscall.c,v 1.9 2021/05/20 04:22:33 drahn Exp $ */ /* * Copyright (c) 2020 Brian Bamsch @@ -48,9 +48,6 @@ svc_handler(trapframe_t *frame) uvmexp.syscalls++; - /* Before enabling interrupts, save FPU state */ - fpu_save(p, frame); - /* Re-enable interrupts if they were enabled previously */ if (__predict_true(frame->tf_scause & EXCP_INTR)) intr_enable(); diff --git a/sys/arch/riscv64/riscv64/trap.c b/sys/arch/riscv64/riscv64/trap.c index 9845eedf73d..3b57620f878 100644 --- a/sys/arch/riscv64/riscv64/trap.c +++ b/sys/arch/riscv64/riscv64/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.12 2021/05/15 20:20:35 deraadt Exp $ */ +/* $OpenBSD: trap.c,v 1.13 2021/05/20 04:22:33 drahn Exp $ */ /* * Copyright (c) 2020 Shivam Waghela @@ -125,19 +125,11 @@ do_trap_user(struct trapframe *frame) KASSERTMSG((csr_read(sstatus) & (SSTATUS_SUM)) == 0, "Came from U mode with SUM enabled"); - /* Save fpu context before (possibly) calling interrupt handler. - * Could end up context switching in interrupt handler. - */ - fpu_save(p, frame); - exception = (frame->tf_scause & EXCP_MASK); if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); - frame->tf_sstatus &= ~SSTATUS_FS_MASK; - if (pcb->pcb_fpcpu == curcpu() && curcpu()->ci_fpuproc == p) { - frame->tf_sstatus |= SSTATUS_FS_CLEAN; - } + return; } @@ -166,7 +158,6 @@ do_trap_user(struct trapframe *frame) case EXCP_ILLEGAL_INSTRUCTION: if ((frame->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_OFF) { fpu_load(p); - frame->tf_sstatus &= ~SSTATUS_FS_MASK; break; } printf("ILL at %lx scause %lx stval %lx\n", frame->tf_sepc, @@ -185,16 +176,6 @@ do_trap_user(struct trapframe *frame) exception, frame->tf_stval); } - /* now that we will not context switch again, - * see if we should enable FPU - */ - frame->tf_sstatus &= ~SSTATUS_FS_MASK; - if (pcb->pcb_fpcpu == curcpu() && curcpu()->ci_fpuproc == p) { - frame->tf_sstatus |= SSTATUS_FS_CLEAN; - //printf ("FPU enabled userland %p %p\n", - // pcb->pcb_fpcpu, curcpu()->ci_fpuproc); - } - userret(p); } diff --git a/sys/arch/riscv64/riscv64/vm_machdep.c b/sys/arch/riscv64/riscv64/vm_machdep.c index 043530bb7fb..331727c7100 100644 --- a/sys/arch/riscv64/riscv64/vm_machdep.c +++ b/sys/arch/riscv64/riscv64/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.5 2021/05/16 06:20:29 jsg Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.6 2021/05/20 04:22:33 drahn Exp $ */ /*- * Copyright (c) 1995 Charles M. Hannum. All rights reserved. @@ -61,7 +61,10 @@ cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb, struct trapframe *tf; struct switchframe *sf; - // Does any flushing need to be done if process was running? + /* If the FPU is enabled, allow fpu_save to store data if needed */ + if (pcb->pcb_flags & PCB_FPU) { + fpu_save(p1, p1->p_addr->u_pcb.pcb_tf); + } /* Copy the pcb. */ *pcb = p1->p_addr->u_pcb;