From f2269f651e776f446d799063d003a395850ff3c3 Mon Sep 17 00:00:00 2001 From: kettenis Date: Wed, 30 Jun 2021 22:20:56 +0000 Subject: [PATCH] Simplify the way we track the FPU state, using powerpc64 as a model. The new code still uses the clean/dirty state that the hardware reports to optimize saving/restoring the FPU register, but no longer attempts to keep the FPU registers alive across a context switch. Fixes panics seen on MP kernels. ok drahn@ --- sys/arch/riscv64/include/cpu.h | 3 +- sys/arch/riscv64/include/pcb.h | 3 +- sys/arch/riscv64/riscv64/fpu.c | 110 ++++++------------------- sys/arch/riscv64/riscv64/genassym.cf | 3 +- sys/arch/riscv64/riscv64/machdep.c | 42 ++++------ sys/arch/riscv64/riscv64/sig_machdep.c | 24 +++--- sys/arch/riscv64/riscv64/vm_machdep.c | 17 ++-- 7 files changed, 62 insertions(+), 140 deletions(-) diff --git a/sys/arch/riscv64/include/cpu.h b/sys/arch/riscv64/include/cpu.h index 4730598a0c7..306a4b071b1 100644 --- a/sys/arch/riscv64/include/cpu.h +++ b/sys/arch/riscv64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.8 2021/06/29 21:27:52 kettenis Exp $ */ +/* $OpenBSD: cpu.h,v 1.9 2021/06/30 22:20:56 kettenis Exp $ */ /* * Copyright (c) 2019 Mike Larkin @@ -84,7 +84,6 @@ struct cpu_info { struct proc *ci_curproc; struct pmap *ci_curpm; - struct proc *ci_fpuproc; u_int32_t ci_randseed; struct pcb *ci_curpcb; diff --git a/sys/arch/riscv64/include/pcb.h b/sys/arch/riscv64/include/pcb.h index 1ebe9b2877a..430066ce3de 100644 --- a/sys/arch/riscv64/include/pcb.h +++ b/sys/arch/riscv64/include/pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pcb.h,v 1.2 2021/05/12 01:20:52 jsg Exp $ */ +/* $OpenBSD: pcb.h,v 1.3 2021/06/30 22:20:56 kettenis Exp $ */ /* * Copyright (c) 2016 Dale Rahn @@ -39,6 +39,5 @@ struct pcb { caddr_t pcb_onfault; // On fault handler struct fpreg pcb_fpstate; // Floating Point state */ - struct cpu_info *pcb_fpcpu; }; #endif /* _MACHINE_PCB_H_ */ diff --git a/sys/arch/riscv64/riscv64/fpu.c b/sys/arch/riscv64/riscv64/fpu.c index 38663f3b4c6..0b2963d2e9a 100644 --- a/sys/arch/riscv64/riscv64/fpu.c +++ b/sys/arch/riscv64/riscv64/fpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fpu.c,v 1.10 2021/06/29 21:27:53 kettenis Exp $ */ +/* $OpenBSD: fpu.c,v 1.11 2021/06/30 22:20:56 kettenis Exp $ */ /* * Copyright (c) 2020 Dale Rahn @@ -22,24 +22,6 @@ #include #include "machine/asm.h" -void -fpu_clear(struct fpreg *fp) -{ - /* rounding mode set to 0, should be RND_NEAREST */ - bzero(fp, sizeof (*fp)); -} - -void -fpu_discard(struct proc *p) -{ - struct cpu_info *ci = curcpu(); - - if (curpcb->pcb_fpcpu == ci && ci->ci_fpuproc == p) { - ci->ci_fpuproc = NULL; - curpcb->pcb_fpcpu = NULL; - } -} - void fpu_disable(void) { @@ -54,44 +36,21 @@ fpu_enable_clean(void) } void -fpu_save(struct proc *p, struct trapframe *frame) +fpu_save(struct proc *p, struct trapframe *tf) { - struct cpu_info *ci = curcpu(); - struct pcb *pcb = &p->p_addr->u_pcb; - struct fpreg *fp = &p->p_addr->u_pcb.pcb_fpstate; - register void *ptr = fp->fp_f; - uint64_t fcsr; - - if (ci->ci_fpuproc != p) { - return; - } + struct pcb *pcb = &p->p_addr->u_pcb; + struct fpreg *fp = &pcb->pcb_fpstate; - if (pcb->pcb_fpcpu == NULL || ci->ci_fpuproc == NULL || - !(pcb->pcb_fpcpu == ci && ci->ci_fpuproc == p)) { - /* disable fpu */ - panic("FPU enabled but curproc and curcpu do not agree %p %p", - pcb->pcb_fpcpu, ci->ci_fpuproc); - } - - - switch (p->p_addr->u_pcb.pcb_tf->tf_sstatus & SSTATUS_FS_MASK) { - case SSTATUS_FS_OFF: - /* Fallthru */ - case SSTATUS_FS_CLEAN: - p->p_addr->u_pcb.pcb_tf->tf_sstatus &= ~SSTATUS_FS_MASK; + if ((tf->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_OFF || + (tf->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_CLEAN) return; - case SSTATUS_FS_DIRTY: - default: - ; - /* fallthru */ - } fpu_enable_clean(); - __asm volatile("frcsr %0" : "=r"(fcsr)); - fp->fp_fcsr = fcsr; - #define STFx(x) \ - __asm volatile ("fsd f" __STRING(x) ", %1(%0)": :"r"(ptr), "i"(x * 8)) + __asm volatile("frcsr %0" : "=r"(fp->fp_fcsr)); + +#define STFx(x) \ + __asm volatile ("fsd f" #x ", %1(%0)" : : "r"(fp->fp_f), "i"(x * 8)) STFx(0); STFx(1); @@ -126,46 +85,32 @@ fpu_save(struct proc *p, struct trapframe *frame) STFx(30); STFx(31); - /* - * pcb->pcb_fpcpu and ci->ci_fpuproc are still valid - * until some other fpu context steals either the cpu - * context or another cpu steals the fpu context. - */ + fpu_disable(); + /* mark FPU as clean */ p->p_addr->u_pcb.pcb_tf->tf_sstatus &= ~SSTATUS_FS_MASK; - fpu_disable(); + p->p_addr->u_pcb.pcb_tf->tf_sstatus |= SSTATUS_FS_CLEAN; } void fpu_load(struct proc *p) { - struct cpu_info *ci = curcpu(); - struct pcb *pcb = &p->p_addr->u_pcb; - - struct fpreg *fp = &p->p_addr->u_pcb.pcb_fpstate; - register void *ptr = fp->fp_f; + struct pcb *pcb = &p->p_addr->u_pcb; + struct fpreg *fp = &pcb->pcb_fpstate; KASSERT((pcb->pcb_tf->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_OFF); - /* - * Verify that context is not already loaded - */ - if (pcb->pcb_fpcpu == ci && ci->ci_fpuproc == p) { - /* fpu state loaded, enable it */ - pcb->pcb_tf->tf_sstatus |= SSTATUS_FS_CLEAN; - return; - } - //printf("FPU load requested %p %p \n", ci, p); - if ((pcb->pcb_flags & PCB_FPU) == 0) { - fpu_clear(fp); + memset(fp, 0, sizeof(*fp)); pcb->pcb_flags |= PCB_FPU; } + fpu_enable_clean(); - __asm volatile("fscsr %0" : : "r"(fp->fp_fcsr)); - #define RDFx(x) \ - __asm volatile ("fld f" __STRING(x) ", %1(%0)": :"r"(ptr), "i"(x * 8)) + __asm volatile("fscsr %0" : : "r"(fp->fp_fcsr)); + +#define RDFx(x) \ + __asm volatile ("fld f" #x ", %1(%0)" : : "r"(fp->fp_f), "i"(x * 8)) RDFx(0); RDFx(1); @@ -200,14 +145,9 @@ fpu_load(struct proc *p) RDFx(30); RDFx(31); - /* - * pcb->pcb_fpcpu and ci->ci_fpuproc are activated here - * to indicate that the fpu context is correctly loaded on - * this cpu. XXX block interrupts for these saves ? - */ - pcb->pcb_fpcpu = ci; - ci->ci_fpuproc = p; - p->p_addr->u_pcb.pcb_tf->tf_sstatus |= SSTATUS_FS_CLEAN; - fpu_disable(); + + /* mark FPU as clean */ + p->p_addr->u_pcb.pcb_tf->tf_sstatus &= ~SSTATUS_FS_MASK; + p->p_addr->u_pcb.pcb_tf->tf_sstatus |= SSTATUS_FS_CLEAN; } diff --git a/sys/arch/riscv64/riscv64/genassym.cf b/sys/arch/riscv64/riscv64/genassym.cf index ab6c8bd538d..07a170d78fc 100644 --- a/sys/arch/riscv64/riscv64/genassym.cf +++ b/sys/arch/riscv64/riscv64/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.4 2021/06/29 21:27:53 kettenis Exp $ +# $OpenBSD: genassym.cf,v 1.5 2021/06/30 22:20:56 kettenis Exp $ # # Copyright (c) 2020 Brian Bamsch # All rights reserved. @@ -62,7 +62,6 @@ member pcb_tf member pcb_sp member pcb_onfault member pcb_fpstate -member pcb_fpcpu struct cpu_info member ci_dev diff --git a/sys/arch/riscv64/riscv64/machdep.c b/sys/arch/riscv64/riscv64/machdep.c index 654c9762259..0327513dd54 100644 --- a/sys/arch/riscv64/riscv64/machdep.c +++ b/sys/arch/riscv64/riscv64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.22 2021/06/21 15:05:51 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.23 2021/06/30 22:20:56 kettenis Exp $ */ /* * Copyright (c) 2014 Patrick Wildt @@ -294,30 +294,19 @@ void cpu_switchto_asm(struct proc *, struct proc *); void cpu_switchto(struct proc *old, struct proc *new) { - struct cpu_info *ci = curcpu(); - struct trapframe *tf; - struct pcb *pcb; - - /* old may be NULL, do not save context */ - if (old != NULL) { - tf = old->p_addr->u_pcb.pcb_tf; - if ((tf->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_DIRTY) { + if (old) { + struct pcb *pcb = &old->p_addr->u_pcb; + struct trapframe *tf = pcb->pcb_tf; + + if (pcb->pcb_flags & PCB_FPU) fpu_save(old, tf); - } + + /* drop FPU state */ tf->tf_sstatus &= ~SSTATUS_FS_MASK; + tf->tf_sstatus |= SSTATUS_FS_OFF; } cpu_switchto_asm(old, new); - - pcb = ci->ci_curpcb; - tf = new->p_addr->u_pcb.pcb_tf; -#if 0 - /* XXX this optimization is subtly broken */ - if (pcb->pcb_fpcpu == ci && ci->ci_fpuproc == new) { - /* If fpu state is already loaded, allow it to be used */ - tf->tf_sstatus |= SSTATUS_FS_CLEAN; - } -#endif } /* @@ -420,16 +409,15 @@ void setregs(struct proc *p, struct exec_package *pack, u_long stack, register_t *retval) { - struct trapframe *tf; + struct trapframe *tf = p->p_addr->u_pcb.pcb_tf; + struct pcb *pcb = &p->p_addr->u_pcb; /* If we were using the FPU, forget about it. */ - if (p->p_addr->u_pcb.pcb_fpcpu != NULL) - fpu_discard(p); - p->p_addr->u_pcb.pcb_flags &= ~PCB_FPU; - - tf = p->p_addr->u_pcb.pcb_tf; + pcb->pcb_flags &= ~PCB_FPU; + tf->tf_sstatus &= ~SSTATUS_FS_MASK; + tf->tf_sstatus |= SSTATUS_FS_OFF; - memset (tf,0, sizeof(*tf)); + memset(tf, 0, sizeof(*tf)); tf->tf_a[0] = stack; // XXX Inherited from FreeBSD. Why? tf->tf_sp = STACKALIGN(stack); tf->tf_ra = pack->ep_entry; diff --git a/sys/arch/riscv64/riscv64/sig_machdep.c b/sys/arch/riscv64/riscv64/sig_machdep.c index e865a9eff1c..cb6f1153db5 100644 --- a/sys/arch/riscv64/riscv64/sig_machdep.c +++ b/sys/arch/riscv64/riscv64/sig_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sig_machdep.c,v 1.7 2021/06/29 21:27:53 kettenis Exp $ */ +/* $OpenBSD: sig_machdep.c,v 1.8 2021/06/30 22:20:56 kettenis Exp $ */ /* * Copyright (c) 1990 The Regents of the University of California. @@ -137,6 +137,10 @@ sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip) /* make the stack aligned */ fp = (struct sigframe *)STACKALIGN(fp); + /* Save FPU state to PCB if necessary. */ + if (p->p_addr->u_pcb.pcb_flags & PCB_FPU) + fpu_save(p, tf); + /* Build stack frame for signal trampoline. */ bzero(&frame, sizeof(frame)); frame.sf_signum = sig; @@ -156,13 +160,11 @@ sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip) /* Save signal mask. */ frame.sf_sc.sc_mask = mask; - if (p->p_addr->u_pcb.pcb_flags & PCB_FPU && - (tf->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_DIRTY) { - fpu_save(p, tf); + /* Copy the saved FPU state into the frame if necessary. */ + if (p->p_addr->u_pcb.pcb_flags & PCB_FPU) { fpreg = &p->p_addr->u_pcb.pcb_fpstate; - for (i=0; i < 32; i++) { + for (i = 0; i < 32; i++) frame.sf_sc.sc_f[i] = fpreg->fp_f[i]; - } frame.sf_sc.sc_fcsr = fpreg->fp_fcsr; } @@ -251,16 +253,16 @@ sys_sigreturn(struct proc *p, void *v, register_t *retval) tf->tf_tp = ksc.sc_tp; tf->tf_sepc = ksc.sc_sepc; + /* Write saved FPU state back to PCB if necessary. */ if (p->p_addr->u_pcb.pcb_flags & PCB_FPU) { fpreg = &p->p_addr->u_pcb.pcb_fpstate; - for (i=0; i < 32; i++) { + for (i = 0; i < 32; i++) fpreg->fp_f[i] = ksc.sc_f[i]; - } fpreg->fp_fcsr = ksc.sc_fcsr; - /* force disable and discard FPU contents */ - tf->tf_sstatus &= ~SSTATUS_FS_MASK; /* disable fpu */ - fpu_discard(p); + /* drop FPU state */ + tf->tf_sstatus &= ~SSTATUS_FS_MASK; + tf->tf_sstatus |= SSTATUS_FS_OFF; } /* Restore signal mask. */ diff --git a/sys/arch/riscv64/riscv64/vm_machdep.c b/sys/arch/riscv64/riscv64/vm_machdep.c index 331727c7100..419455ef335 100644 --- a/sys/arch/riscv64/riscv64/vm_machdep.c +++ b/sys/arch/riscv64/riscv64/vm_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_machdep.c,v 1.6 2021/05/20 04:22:33 drahn Exp $ */ +/* $OpenBSD: vm_machdep.c,v 1.7 2021/06/30 22:20:56 kettenis Exp $ */ /*- * Copyright (c) 1995 Charles M. Hannum. All rights reserved. @@ -57,18 +57,17 @@ void cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb, void (*func)(void *), void *arg) { - struct pcb *pcb = (struct pcb *)&p2->p_addr->u_pcb; + struct pcb *pcb = &p2->p_addr->u_pcb; + struct pcb *pcb1 = &p1->p_addr->u_pcb; struct trapframe *tf; struct switchframe *sf; - /* If the FPU is enabled, allow fpu_save to store data if needed */ - if (pcb->pcb_flags & PCB_FPU) { - fpu_save(p1, p1->p_addr->u_pcb.pcb_tf); - } + /* Save FPU state to PCB if necessary. */ + if (pcb1->pcb_flags & PCB_FPU) + fpu_save(p1, pcb1->pcb_tf); /* Copy the pcb. */ *pcb = p1->p_addr->u_pcb; - pcb->pcb_fpcpu = NULL; pmap_activate(p2); @@ -108,10 +107,6 @@ cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb, void cpu_exit(struct proc *p) { - /* If we were using the FPU, forget about it. */ - if (p->p_addr->u_pcb.pcb_fpcpu != NULL) - fpu_discard(p); - pmap_deactivate(p); sched_exit(p); } -- 2.20.1