From b409d72e9db206cbce0672755443e20531101875 Mon Sep 17 00:00:00 2001 From: Ville Juven Date: Mon, 15 May 2023 17:56:15 +0300 Subject: [PATCH] riscv/fpu: Restore correct lazy-FPU functionality - Save the FPU registers into the tcb so they don't get lost if the stack frame for xcp.regs moves (as it does) - Handle interger and FPU register save/load separately - Integer registers are saved/loaded always, like before - FPU registers are only saved during a context switch: - Save ONLY if FPU is dirty - Restore always if FPU has been used (not in FSTATE_OFF, FSTATE_INIT) - Remove all lazy-FPU related logic from the macros, it is not needed --- arch/risc-v/include/irq.h | 82 +++++++------ arch/risc-v/src/common/riscv_cpupause.c | 4 +- .../src/common/riscv_exception_common.S | 4 - arch/risc-v/src/common/riscv_fork.c | 24 ++-- arch/risc-v/src/common/riscv_fork.h | 1 + arch/risc-v/src/common/riscv_fpu.S | 113 +++++++++++++++--- arch/risc-v/src/common/riscv_internal.h | 16 +++ arch/risc-v/src/common/riscv_macros.S | 30 ----- arch/risc-v/src/common/riscv_swint.c | 6 +- .../src/common/supervisor/riscv_syscall.S | 4 - 10 files changed, 176 insertions(+), 108 deletions(-) diff --git a/arch/risc-v/include/irq.h b/arch/risc-v/include/irq.h index a566492e89..5941d7057b 100644 --- a/arch/risc-v/include/irq.h +++ b/arch/risc-v/include/irq.h @@ -204,50 +204,52 @@ #endif #ifdef CONFIG_ARCH_FPU -# define REG_F0_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 0) -# define REG_F1_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 1) -# define REG_F2_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 2) -# define REG_F3_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 3) -# define REG_F4_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 4) -# define REG_F5_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 5) -# define REG_F6_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 6) -# define REG_F7_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 7) -# define REG_F8_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 8) -# define REG_F9_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 9) -# define REG_F10_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 10) -# define REG_F11_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 11) -# define REG_F12_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 12) -# define REG_F13_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 13) -# define REG_F14_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 14) -# define REG_F15_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 15) -# define REG_F16_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 16) -# define REG_F17_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 17) -# define REG_F18_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 18) -# define REG_F19_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 19) -# define REG_F20_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 20) -# define REG_F21_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 21) -# define REG_F22_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 22) -# define REG_F23_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 23) -# define REG_F24_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 24) -# define REG_F25_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 25) -# define REG_F26_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 26) -# define REG_F27_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 27) -# define REG_F28_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 28) -# define REG_F29_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 29) -# define REG_F30_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 30) -# define REG_F31_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 31) -# define REG_FCSR_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 32) +# define REG_F0_NDX (FPU_REG_SIZE * 0) +# define REG_F1_NDX (FPU_REG_SIZE * 1) +# define REG_F2_NDX (FPU_REG_SIZE * 2) +# define REG_F3_NDX (FPU_REG_SIZE * 3) +# define REG_F4_NDX (FPU_REG_SIZE * 4) +# define REG_F5_NDX (FPU_REG_SIZE * 5) +# define REG_F6_NDX (FPU_REG_SIZE * 6) +# define REG_F7_NDX (FPU_REG_SIZE * 7) +# define REG_F8_NDX (FPU_REG_SIZE * 8) +# define REG_F9_NDX (FPU_REG_SIZE * 9) +# define REG_F10_NDX (FPU_REG_SIZE * 10) +# define REG_F11_NDX (FPU_REG_SIZE * 11) +# define REG_F12_NDX (FPU_REG_SIZE * 12) +# define REG_F13_NDX (FPU_REG_SIZE * 13) +# define REG_F14_NDX (FPU_REG_SIZE * 14) +# define REG_F15_NDX (FPU_REG_SIZE * 15) +# define REG_F16_NDX (FPU_REG_SIZE * 16) +# define REG_F17_NDX (FPU_REG_SIZE * 17) +# define REG_F18_NDX (FPU_REG_SIZE * 18) +# define REG_F19_NDX (FPU_REG_SIZE * 19) +# define REG_F20_NDX (FPU_REG_SIZE * 20) +# define REG_F21_NDX (FPU_REG_SIZE * 21) +# define REG_F22_NDX (FPU_REG_SIZE * 22) +# define REG_F23_NDX (FPU_REG_SIZE * 23) +# define REG_F24_NDX (FPU_REG_SIZE * 24) +# define REG_F25_NDX (FPU_REG_SIZE * 25) +# define REG_F26_NDX (FPU_REG_SIZE * 26) +# define REG_F27_NDX (FPU_REG_SIZE * 27) +# define REG_F28_NDX (FPU_REG_SIZE * 28) +# define REG_F29_NDX (FPU_REG_SIZE * 29) +# define REG_F30_NDX (FPU_REG_SIZE * 30) +# define REG_F31_NDX (FPU_REG_SIZE * 31) +# define REG_FCSR_NDX (FPU_REG_SIZE * 32) # define FPU_XCPT_REGS (FPU_REG_SIZE * 33) -# define FPU_REG_FULL_SIZE (INT_REG_SIZE * FPU_REG_SIZE) +# define FPU_XCPT_SIZE (INT_REG_SIZE * FPU_XCPT_REGS) #else /* !CONFIG_ARCH_FPU */ # define FPU_XCPT_REGS (0) -# define FPU_REG_FULL_SIZE (0) +# define FPU_XCPT_SIZE (0) #endif /* CONFIG_ARCH_FPU */ #define XCPTCONTEXT_REGS (INT_XCPT_REGS + FPU_XCPT_REGS) -#define XCPTCONTEXT_SIZE (INT_REG_SIZE * XCPTCONTEXT_REGS) +/* Save only integer regs. FPU is handled separately */ + +#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE) /* In assembly language, values have to be referenced as byte address * offsets. But in C, it is more convenient to reference registers as @@ -562,9 +564,15 @@ struct xcptcontext #endif #endif - /* Register save area */ + /* Integer register save area */ uintptr_t *regs; + + /* FPU register save area */ + +#ifdef CONFIG_ARCH_FPU + uintptr_t fregs[FPU_XCPT_REGS]; +#endif }; #endif /* __ASSEMBLY__ */ diff --git a/arch/risc-v/src/common/riscv_cpupause.c b/arch/risc-v/src/common/riscv_cpupause.c index 7d30dc5543..87412f7ad4 100644 --- a/arch/risc-v/src/common/riscv_cpupause.c +++ b/arch/risc-v/src/common/riscv_cpupause.c @@ -129,7 +129,7 @@ int up_cpu_paused(int cpu) * of the assigned task list for this CPU. */ - riscv_savestate(tcb->xcp.regs); + riscv_savecontext(tcb); /* Wait for the spinlock to be released */ @@ -161,7 +161,7 @@ int up_cpu_paused(int cpu) * will be made when the interrupt returns. */ - riscv_restorestate(tcb->xcp.regs); + riscv_restorecontext(tcb); spin_unlock(&g_cpu_wait[cpu]); spin_unlock(&g_cpu_resumed[cpu]); diff --git a/arch/risc-v/src/common/riscv_exception_common.S b/arch/risc-v/src/common/riscv_exception_common.S index 1f1180d46b..9d78b65c09 100644 --- a/arch/risc-v/src/common/riscv_exception_common.S +++ b/arch/risc-v/src/common/riscv_exception_common.S @@ -113,8 +113,6 @@ exception_common: csrr s0, CSR_EPC REGSTORE s0, REG_EPC(sp) /* exception PC */ - riscv_savefpu sp - /* Setup arg0(exception cause), arg1(context) */ csrr a0, CSR_CAUSE /* exception cause */ @@ -148,8 +146,6 @@ exception_common: mv sp, a0 - riscv_loadfpu sp - REGLOAD s0, REG_EPC(sp) /* restore sepc */ csrw CSR_EPC, s0 diff --git a/arch/risc-v/src/common/riscv_fork.c b/arch/risc-v/src/common/riscv_fork.c index 53ac2f3740..b1d96598e0 100644 --- a/arch/risc-v/src/common/riscv_fork.c +++ b/arch/risc-v/src/common/riscv_fork.c @@ -228,18 +228,18 @@ pid_t riscv_fork(const struct fork_s *context) child->cmn.xcp.regs[REG_GP] = newsp; /* Global pointer */ #endif #ifdef CONFIG_ARCH_FPU - child->cmn.xcp.regs[REG_FS0] = context->fs0; /* Saved register fs1 */ - child->cmn.xcp.regs[REG_FS1] = context->fs1; /* Saved register fs1 */ - child->cmn.xcp.regs[REG_FS2] = context->fs2; /* Saved register fs2 */ - child->cmn.xcp.regs[REG_FS3] = context->fs3; /* Saved register fs3 */ - child->cmn.xcp.regs[REG_FS4] = context->fs4; /* Saved register fs4 */ - child->cmn.xcp.regs[REG_FS5] = context->fs5; /* Saved register fs5 */ - child->cmn.xcp.regs[REG_FS6] = context->fs6; /* Saved register fs6 */ - child->cmn.xcp.regs[REG_FS7] = context->fs7; /* Saved register fs7 */ - child->cmn.xcp.regs[REG_FS8] = context->fs8; /* Saved register fs8 */ - child->cmn.xcp.regs[REG_FS9] = context->fs9; /* Saved register fs9 */ - child->cmn.xcp.regs[REG_FS10] = context->fs10; /* Saved register fs10 */ - child->cmn.xcp.regs[REG_FS11] = context->fs11; /* Saved register fs11 */ + child->cmn.xcp.fregs[REG_FS0] = context->fs0; /* Saved register fs1 */ + child->cmn.xcp.fregs[REG_FS1] = context->fs1; /* Saved register fs1 */ + child->cmn.xcp.fregs[REG_FS2] = context->fs2; /* Saved register fs2 */ + child->cmn.xcp.fregs[REG_FS3] = context->fs3; /* Saved register fs3 */ + child->cmn.xcp.fregs[REG_FS4] = context->fs4; /* Saved register fs4 */ + child->cmn.xcp.fregs[REG_FS5] = context->fs5; /* Saved register fs5 */ + child->cmn.xcp.fregs[REG_FS6] = context->fs6; /* Saved register fs6 */ + child->cmn.xcp.fregs[REG_FS7] = context->fs7; /* Saved register fs7 */ + child->cmn.xcp.fregs[REG_FS8] = context->fs8; /* Saved register fs8 */ + child->cmn.xcp.fregs[REG_FS9] = context->fs9; /* Saved register fs9 */ + child->cmn.xcp.fregs[REG_FS10] = context->fs10; /* Saved register fs10 */ + child->cmn.xcp.fregs[REG_FS11] = context->fs11; /* Saved register fs11 */ #endif #ifdef CONFIG_LIB_SYSCALL diff --git a/arch/risc-v/src/common/riscv_fork.h b/arch/risc-v/src/common/riscv_fork.h index 32cadba8d8..5fbd207af7 100644 --- a/arch/risc-v/src/common/riscv_fork.h +++ b/arch/risc-v/src/common/riscv_fork.h @@ -90,6 +90,7 @@ #endif #ifdef CONFIG_ARCH_FPU +# define FPU_REG_FULL_SIZE (INT_REG_SIZE * FPU_REG_SIZE) # define FORK_FS0_OFFSET (FORK_INT_SIZE + 0*FPU_REG_FULL_SIZE) # define FORK_FS1_OFFSET (FORK_INT_SIZE + 1*FPU_REG_FULL_SIZE) # define FORK_FS2_OFFSET (FORK_INT_SIZE + 2*FPU_REG_FULL_SIZE) diff --git a/arch/risc-v/src/common/riscv_fpu.S b/arch/risc-v/src/common/riscv_fpu.S index 84989ccbf3..9d5feb9390 100644 --- a/arch/risc-v/src/common/riscv_fpu.S +++ b/arch/risc-v/src/common/riscv_fpu.S @@ -1,4 +1,4 @@ -/************************************************************************************ +/**************************************************************************** * arch/risc-v/src/common/riscv_fpu.S * * Licensed to the Apache Software Foundation (ASF) under one or more @@ -16,11 +16,11 @@ * License for the specific language governing permissions and limitations * under the License. * - ************************************************************************************/ + ****************************************************************************/ -/************************************************************************************ +/**************************************************************************** * Included Files - ************************************************************************************/ + ****************************************************************************/ #include @@ -29,20 +29,24 @@ #include #include +#include "riscv_macros.S" + #ifdef CONFIG_ARCH_FPU -/************************************************************************************ +/**************************************************************************** * Public Symbols - ************************************************************************************/ + ****************************************************************************/ .globl riscv_fpuconfig + .globl riscv_savefpu + .globl riscv_restorefpu .file "riscv_fpu.S" -/************************************************************************************ +/**************************************************************************** * Public Functions - ************************************************************************************/ + ****************************************************************************/ -/************************************************************************************ +/**************************************************************************** * Name: riscv_fpuconfig * * Description: @@ -57,18 +61,95 @@ * Returned Value: * This function does not return anything explicitly. * - ************************************************************************************/ + ****************************************************************************/ .type riscv_fpuconfig, function riscv_fpuconfig: - li a0, MSTATUS_FS_INIT - csrs CSR_STATUS, a0 + li a0, MSTATUS_FS_INIT + csrs CSR_STATUS, a0 - fsflags zero - fsrm zero + fsflags zero + fsrm zero - fence.i - ret + fence.i + ret + +/**************************************************************************** + * Name: riscv_savefpu + * + * Description: + * Given the pointer to a register save area (in A0), save the state of the + * floating point registers. + * + * C Function Prototype: + * void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs); + * + * Input Parameters: + * regs - A pointer to the integer registers that contain the status + * fregs - A pointer to the register save area in which to save the + * floating point registers + * + * Returned Value: + * None + * + ****************************************************************************/ + + .type riscv_savefpu, function + +riscv_savefpu: + + REGLOAD t0, REG_INT_CTX(a0) + li t1, MSTATUS_FS + and t2, t0, t1 + li t1, MSTATUS_FS_DIRTY + bne t2, t1, 1f + li t1, ~MSTATUS_FS + and t0, t0, t1 + li t1, MSTATUS_FS_CLEAN + or t0, t0, t1 + REGSTORE t0, REG_INT_CTX(a0) + + riscv_savefpu a1 + +1: + ret + +/**************************************************************************** + * Name: riscv_restorefpu + * + * Description: + * Given the pointer to a register save area (in A0), restore the state of + * the floating point registers. + * + * C Function Prototype: + * void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs); + * + * Input Parameters: + * regs - A pointer to the integer registers that contain the status + * fregs - A pointer to the register save area containing the floating + * point registers. + * + * Returned Value: + * This function does not return anything explicitly. However, it is + * called from interrupt level assembly logic that assumes that r0 is + * preserved. + * + ****************************************************************************/ + + .type riscv_restorefpu, function + +riscv_restorefpu: + + REGLOAD t0, REG_INT_CTX(a0) + li t1, MSTATUS_FS + and t2, t0, t1 + li t1, MSTATUS_FS_INIT + ble t2, t1, 1f + + riscv_loadfpu a1 + +1: + ret #endif /* CONFIG_ARCH_FPU */ diff --git a/arch/risc-v/src/common/riscv_internal.h b/arch/risc-v/src/common/riscv_internal.h index 6b385818cb..836d7354f5 100644 --- a/arch/risc-v/src/common/riscv_internal.h +++ b/arch/risc-v/src/common/riscv_internal.h @@ -204,8 +204,12 @@ void riscv_exception_attach(void); #ifdef CONFIG_ARCH_FPU void riscv_fpuconfig(void); +void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs); +void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs); #else # define riscv_fpuconfig() +# define riscv_savefpu(regs, fregs) +# define riscv_restorefpu(regs, fregs) #endif /* Save / restore context of task */ @@ -213,11 +217,23 @@ void riscv_fpuconfig(void); static inline void riscv_savecontext(struct tcb_s *tcb) { tcb->xcp.regs = (uintptr_t *)CURRENT_REGS; + +#ifdef CONFIG_ARCH_FPU + /* Save current process FPU state to TCB */ + + riscv_savefpu(tcb->xcp.regs, tcb->xcp.fregs); +#endif } static inline void riscv_restorecontext(struct tcb_s *tcb) { CURRENT_REGS = (uintptr_t *)tcb->xcp.regs; + +#ifdef CONFIG_ARCH_FPU + /* Restore FPU state for next process */ + + riscv_restorefpu(tcb->xcp.regs, tcb->xcp.fregs); +#endif } /* RISC-V PMP Config ********************************************************/ diff --git a/arch/risc-v/src/common/riscv_macros.S b/arch/risc-v/src/common/riscv_macros.S index 83626b05b0..5f9bd12f65 100644 --- a/arch/risc-v/src/common/riscv_macros.S +++ b/arch/risc-v/src/common/riscv_macros.S @@ -100,13 +100,6 @@ .macro riscv_savefpu in -#ifdef CONFIG_ARCH_FPU - REGLOAD t0, REG_INT_CTX(\in) - li t1, MSTATUS_FS - and t0, t0, t1 - li t1, MSTATUS_FS_INIT - ble t0, t1, 1f - /* Store all floating point registers */ FSTORE f0, REG_F0(\in) @@ -145,14 +138,6 @@ frcsr t0 REGSTORE t0, REG_FCSR(\in) -1: - - /* Restore what we have just destroyed (t0, t1) */ - - REGLOAD t0, REG_T0(\in) - REGLOAD t1, REG_T1(\in) -#endif - .endm /**************************************************************************** @@ -216,13 +201,6 @@ .macro riscv_loadfpu out -#ifdef CONFIG_ARCH_FPU - REGLOAD t0, REG_INT_CTX(\out) - li t1, MSTATUS_FS - and t0, t0, t1 - li t1, MSTATUS_FS_INIT - ble t0, t1, 1f - /* Load all floating point registers */ FLOAD f0, REG_F0(\out) @@ -263,14 +241,6 @@ REGLOAD t0, REG_FCSR(\out) fscsr t0 -1: - - /* Restore what we have just destroyed (t0, t1) */ - - REGLOAD t0, REG_T0(\out) - REGLOAD t1, REG_T1(\out) -#endif - .endm /**************************************************************************** diff --git a/arch/risc-v/src/common/riscv_swint.c b/arch/risc-v/src/common/riscv_swint.c index dc2c11f00e..0a262fe700 100644 --- a/arch/risc-v/src/common/riscv_swint.c +++ b/arch/risc-v/src/common/riscv_swint.c @@ -153,7 +153,7 @@ int riscv_swint(int irq, void *context, void *arg) struct tcb_s *next = (struct tcb_s *)regs[REG_A1]; DEBUGASSERT(regs[REG_A1] != 0); - CURRENT_REGS = (uintptr_t *)next->xcp.regs; + riscv_restorecontext(next); } break; @@ -180,8 +180,8 @@ int riscv_swint(int irq, void *context, void *arg) struct tcb_s *next = (struct tcb_s *)regs[REG_A2]; DEBUGASSERT(regs[REG_A1] != 0 && regs[REG_A2] != 0); - prev->xcp.regs = (uintptr_t *)CURRENT_REGS; - CURRENT_REGS = (uintptr_t *)next->xcp.regs; + riscv_savecontext(prev); + riscv_restorecontext(next); } break; diff --git a/arch/risc-v/src/common/supervisor/riscv_syscall.S b/arch/risc-v/src/common/supervisor/riscv_syscall.S index 18804a49f1..3e4515fea7 100644 --- a/arch/risc-v/src/common/supervisor/riscv_syscall.S +++ b/arch/risc-v/src/common/supervisor/riscv_syscall.S @@ -109,8 +109,6 @@ sys_call6: addi s0, sp, XCPTCONTEXT_SIZE REGSTORE s0, REG_SP(sp) /* original SP */ - riscv_savefpu sp - mv a0, sp /* a0 = context */ #if CONFIG_ARCH_INTERRUPTSTACK > 15 @@ -127,8 +125,6 @@ sys_call6: mv sp, a0 /* use sp, as a0 gets wiped */ - riscv_loadfpu sp - REGLOAD s0, REG_EPC(sp) /* restore epc */ csrw CSR_EPC, s0