riscv/fpu: Restore correct lazy-FPU functionality

- Save the FPU registers into the tcb so they don't get lost if the stack
  frame for xcp.regs moves (as it does)
- Handle interger and FPU register save/load separately
- Integer registers are saved/loaded always, like before
- FPU registers are only saved during a context switch:
  - Save ONLY if FPU is dirty
  - Restore always if FPU has been used (not in FSTATE_OFF, FSTATE_INIT)
- Remove all lazy-FPU related logic from the macros, it is not needed
This commit is contained in:
Ville Juven 2023-05-15 17:56:15 +03:00 committed by Xiang Xiao
parent 3b5c0c885f
commit b409d72e9d
10 changed files with 176 additions and 108 deletions

View File

@ -204,50 +204,52 @@
#endif #endif
#ifdef CONFIG_ARCH_FPU #ifdef CONFIG_ARCH_FPU
# define REG_F0_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 0) # define REG_F0_NDX (FPU_REG_SIZE * 0)
# define REG_F1_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 1) # define REG_F1_NDX (FPU_REG_SIZE * 1)
# define REG_F2_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 2) # define REG_F2_NDX (FPU_REG_SIZE * 2)
# define REG_F3_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 3) # define REG_F3_NDX (FPU_REG_SIZE * 3)
# define REG_F4_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 4) # define REG_F4_NDX (FPU_REG_SIZE * 4)
# define REG_F5_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 5) # define REG_F5_NDX (FPU_REG_SIZE * 5)
# define REG_F6_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 6) # define REG_F6_NDX (FPU_REG_SIZE * 6)
# define REG_F7_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 7) # define REG_F7_NDX (FPU_REG_SIZE * 7)
# define REG_F8_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 8) # define REG_F8_NDX (FPU_REG_SIZE * 8)
# define REG_F9_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 9) # define REG_F9_NDX (FPU_REG_SIZE * 9)
# define REG_F10_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 10) # define REG_F10_NDX (FPU_REG_SIZE * 10)
# define REG_F11_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 11) # define REG_F11_NDX (FPU_REG_SIZE * 11)
# define REG_F12_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 12) # define REG_F12_NDX (FPU_REG_SIZE * 12)
# define REG_F13_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 13) # define REG_F13_NDX (FPU_REG_SIZE * 13)
# define REG_F14_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 14) # define REG_F14_NDX (FPU_REG_SIZE * 14)
# define REG_F15_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 15) # define REG_F15_NDX (FPU_REG_SIZE * 15)
# define REG_F16_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 16) # define REG_F16_NDX (FPU_REG_SIZE * 16)
# define REG_F17_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 17) # define REG_F17_NDX (FPU_REG_SIZE * 17)
# define REG_F18_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 18) # define REG_F18_NDX (FPU_REG_SIZE * 18)
# define REG_F19_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 19) # define REG_F19_NDX (FPU_REG_SIZE * 19)
# define REG_F20_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 20) # define REG_F20_NDX (FPU_REG_SIZE * 20)
# define REG_F21_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 21) # define REG_F21_NDX (FPU_REG_SIZE * 21)
# define REG_F22_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 22) # define REG_F22_NDX (FPU_REG_SIZE * 22)
# define REG_F23_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 23) # define REG_F23_NDX (FPU_REG_SIZE * 23)
# define REG_F24_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 24) # define REG_F24_NDX (FPU_REG_SIZE * 24)
# define REG_F25_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 25) # define REG_F25_NDX (FPU_REG_SIZE * 25)
# define REG_F26_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 26) # define REG_F26_NDX (FPU_REG_SIZE * 26)
# define REG_F27_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 27) # define REG_F27_NDX (FPU_REG_SIZE * 27)
# define REG_F28_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 28) # define REG_F28_NDX (FPU_REG_SIZE * 28)
# define REG_F29_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 29) # define REG_F29_NDX (FPU_REG_SIZE * 29)
# define REG_F30_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 30) # define REG_F30_NDX (FPU_REG_SIZE * 30)
# define REG_F31_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 31) # define REG_F31_NDX (FPU_REG_SIZE * 31)
# define REG_FCSR_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 32) # define REG_FCSR_NDX (FPU_REG_SIZE * 32)
# define FPU_XCPT_REGS (FPU_REG_SIZE * 33) # define FPU_XCPT_REGS (FPU_REG_SIZE * 33)
# define FPU_REG_FULL_SIZE (INT_REG_SIZE * FPU_REG_SIZE) # define FPU_XCPT_SIZE (INT_REG_SIZE * FPU_XCPT_REGS)
#else /* !CONFIG_ARCH_FPU */ #else /* !CONFIG_ARCH_FPU */
# define FPU_XCPT_REGS (0) # define FPU_XCPT_REGS (0)
# define FPU_REG_FULL_SIZE (0) # define FPU_XCPT_SIZE (0)
#endif /* CONFIG_ARCH_FPU */ #endif /* CONFIG_ARCH_FPU */
#define XCPTCONTEXT_REGS (INT_XCPT_REGS + FPU_XCPT_REGS) #define XCPTCONTEXT_REGS (INT_XCPT_REGS + FPU_XCPT_REGS)
#define XCPTCONTEXT_SIZE (INT_REG_SIZE * XCPTCONTEXT_REGS) /* Save only integer regs. FPU is handled separately */
#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE)
/* In assembly language, values have to be referenced as byte address /* In assembly language, values have to be referenced as byte address
* offsets. But in C, it is more convenient to reference registers as * offsets. But in C, it is more convenient to reference registers as
@ -562,9 +564,15 @@ struct xcptcontext
#endif #endif
#endif #endif
/* Register save area */ /* Integer register save area */
uintptr_t *regs; uintptr_t *regs;
/* FPU register save area */
#ifdef CONFIG_ARCH_FPU
uintptr_t fregs[FPU_XCPT_REGS];
#endif
}; };
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */

View File

@ -129,7 +129,7 @@ int up_cpu_paused(int cpu)
* of the assigned task list for this CPU. * of the assigned task list for this CPU.
*/ */
riscv_savestate(tcb->xcp.regs); riscv_savecontext(tcb);
/* Wait for the spinlock to be released */ /* Wait for the spinlock to be released */
@ -161,7 +161,7 @@ int up_cpu_paused(int cpu)
* will be made when the interrupt returns. * will be made when the interrupt returns.
*/ */
riscv_restorestate(tcb->xcp.regs); riscv_restorecontext(tcb);
spin_unlock(&g_cpu_wait[cpu]); spin_unlock(&g_cpu_wait[cpu]);
spin_unlock(&g_cpu_resumed[cpu]); spin_unlock(&g_cpu_resumed[cpu]);

View File

@ -113,8 +113,6 @@ exception_common:
csrr s0, CSR_EPC csrr s0, CSR_EPC
REGSTORE s0, REG_EPC(sp) /* exception PC */ REGSTORE s0, REG_EPC(sp) /* exception PC */
riscv_savefpu sp
/* Setup arg0(exception cause), arg1(context) */ /* Setup arg0(exception cause), arg1(context) */
csrr a0, CSR_CAUSE /* exception cause */ csrr a0, CSR_CAUSE /* exception cause */
@ -148,8 +146,6 @@ exception_common:
mv sp, a0 mv sp, a0
riscv_loadfpu sp
REGLOAD s0, REG_EPC(sp) /* restore sepc */ REGLOAD s0, REG_EPC(sp) /* restore sepc */
csrw CSR_EPC, s0 csrw CSR_EPC, s0

View File

@ -228,18 +228,18 @@ pid_t riscv_fork(const struct fork_s *context)
child->cmn.xcp.regs[REG_GP] = newsp; /* Global pointer */ child->cmn.xcp.regs[REG_GP] = newsp; /* Global pointer */
#endif #endif
#ifdef CONFIG_ARCH_FPU #ifdef CONFIG_ARCH_FPU
child->cmn.xcp.regs[REG_FS0] = context->fs0; /* Saved register fs1 */ child->cmn.xcp.fregs[REG_FS0] = context->fs0; /* Saved register fs1 */
child->cmn.xcp.regs[REG_FS1] = context->fs1; /* Saved register fs1 */ child->cmn.xcp.fregs[REG_FS1] = context->fs1; /* Saved register fs1 */
child->cmn.xcp.regs[REG_FS2] = context->fs2; /* Saved register fs2 */ child->cmn.xcp.fregs[REG_FS2] = context->fs2; /* Saved register fs2 */
child->cmn.xcp.regs[REG_FS3] = context->fs3; /* Saved register fs3 */ child->cmn.xcp.fregs[REG_FS3] = context->fs3; /* Saved register fs3 */
child->cmn.xcp.regs[REG_FS4] = context->fs4; /* Saved register fs4 */ child->cmn.xcp.fregs[REG_FS4] = context->fs4; /* Saved register fs4 */
child->cmn.xcp.regs[REG_FS5] = context->fs5; /* Saved register fs5 */ child->cmn.xcp.fregs[REG_FS5] = context->fs5; /* Saved register fs5 */
child->cmn.xcp.regs[REG_FS6] = context->fs6; /* Saved register fs6 */ child->cmn.xcp.fregs[REG_FS6] = context->fs6; /* Saved register fs6 */
child->cmn.xcp.regs[REG_FS7] = context->fs7; /* Saved register fs7 */ child->cmn.xcp.fregs[REG_FS7] = context->fs7; /* Saved register fs7 */
child->cmn.xcp.regs[REG_FS8] = context->fs8; /* Saved register fs8 */ child->cmn.xcp.fregs[REG_FS8] = context->fs8; /* Saved register fs8 */
child->cmn.xcp.regs[REG_FS9] = context->fs9; /* Saved register fs9 */ child->cmn.xcp.fregs[REG_FS9] = context->fs9; /* Saved register fs9 */
child->cmn.xcp.regs[REG_FS10] = context->fs10; /* Saved register fs10 */ child->cmn.xcp.fregs[REG_FS10] = context->fs10; /* Saved register fs10 */
child->cmn.xcp.regs[REG_FS11] = context->fs11; /* Saved register fs11 */ child->cmn.xcp.fregs[REG_FS11] = context->fs11; /* Saved register fs11 */
#endif #endif
#ifdef CONFIG_LIB_SYSCALL #ifdef CONFIG_LIB_SYSCALL

View File

@ -90,6 +90,7 @@
#endif #endif
#ifdef CONFIG_ARCH_FPU #ifdef CONFIG_ARCH_FPU
# define FPU_REG_FULL_SIZE (INT_REG_SIZE * FPU_REG_SIZE)
# define FORK_FS0_OFFSET (FORK_INT_SIZE + 0*FPU_REG_FULL_SIZE) # define FORK_FS0_OFFSET (FORK_INT_SIZE + 0*FPU_REG_FULL_SIZE)
# define FORK_FS1_OFFSET (FORK_INT_SIZE + 1*FPU_REG_FULL_SIZE) # define FORK_FS1_OFFSET (FORK_INT_SIZE + 1*FPU_REG_FULL_SIZE)
# define FORK_FS2_OFFSET (FORK_INT_SIZE + 2*FPU_REG_FULL_SIZE) # define FORK_FS2_OFFSET (FORK_INT_SIZE + 2*FPU_REG_FULL_SIZE)

View File

@ -1,4 +1,4 @@
/************************************************************************************ /****************************************************************************
* arch/risc-v/src/common/riscv_fpu.S * arch/risc-v/src/common/riscv_fpu.S
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -16,11 +16,11 @@
* License for the specific language governing permissions and limitations * License for the specific language governing permissions and limitations
* under the License. * under the License.
* *
************************************************************************************/ ****************************************************************************/
/************************************************************************************ /****************************************************************************
* Included Files * Included Files
************************************************************************************/ ****************************************************************************/
#include <nuttx/config.h> #include <nuttx/config.h>
@ -29,20 +29,24 @@
#include <arch/irq.h> #include <arch/irq.h>
#include <arch/mode.h> #include <arch/mode.h>
#include "riscv_macros.S"
#ifdef CONFIG_ARCH_FPU #ifdef CONFIG_ARCH_FPU
/************************************************************************************ /****************************************************************************
* Public Symbols * Public Symbols
************************************************************************************/ ****************************************************************************/
.globl riscv_fpuconfig .globl riscv_fpuconfig
.globl riscv_savefpu
.globl riscv_restorefpu
.file "riscv_fpu.S" .file "riscv_fpu.S"
/************************************************************************************ /****************************************************************************
* Public Functions * Public Functions
************************************************************************************/ ****************************************************************************/
/************************************************************************************ /****************************************************************************
* Name: riscv_fpuconfig * Name: riscv_fpuconfig
* *
* Description: * Description:
@ -57,7 +61,7 @@
* Returned Value: * Returned Value:
* This function does not return anything explicitly. * This function does not return anything explicitly.
* *
************************************************************************************/ ****************************************************************************/
.type riscv_fpuconfig, function .type riscv_fpuconfig, function
@ -71,4 +75,81 @@ riscv_fpuconfig:
fence.i fence.i
ret ret
/****************************************************************************
* Name: riscv_savefpu
*
* Description:
* Given the pointer to a register save area (in A0), save the state of the
* floating point registers.
*
* C Function Prototype:
* void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs);
*
* Input Parameters:
* regs - A pointer to the integer registers that contain the status
* fregs - A pointer to the register save area in which to save the
* floating point registers
*
* Returned Value:
* None
*
****************************************************************************/
.type riscv_savefpu, function
riscv_savefpu:
REGLOAD t0, REG_INT_CTX(a0)
li t1, MSTATUS_FS
and t2, t0, t1
li t1, MSTATUS_FS_DIRTY
bne t2, t1, 1f
li t1, ~MSTATUS_FS
and t0, t0, t1
li t1, MSTATUS_FS_CLEAN
or t0, t0, t1
REGSTORE t0, REG_INT_CTX(a0)
riscv_savefpu a1
1:
ret
/****************************************************************************
* Name: riscv_restorefpu
*
* Description:
* Given the pointer to a register save area (in A0), restore the state of
* the floating point registers.
*
* C Function Prototype:
* void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs);
*
* Input Parameters:
* regs - A pointer to the integer registers that contain the status
* fregs - A pointer to the register save area containing the floating
* point registers.
*
* Returned Value:
* This function does not return anything explicitly. However, it is
* called from interrupt level assembly logic that assumes that r0 is
* preserved.
*
****************************************************************************/
.type riscv_restorefpu, function
riscv_restorefpu:
REGLOAD t0, REG_INT_CTX(a0)
li t1, MSTATUS_FS
and t2, t0, t1
li t1, MSTATUS_FS_INIT
ble t2, t1, 1f
riscv_loadfpu a1
1:
ret
#endif /* CONFIG_ARCH_FPU */ #endif /* CONFIG_ARCH_FPU */

View File

@ -204,8 +204,12 @@ void riscv_exception_attach(void);
#ifdef CONFIG_ARCH_FPU #ifdef CONFIG_ARCH_FPU
void riscv_fpuconfig(void); void riscv_fpuconfig(void);
void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs);
void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs);
#else #else
# define riscv_fpuconfig() # define riscv_fpuconfig()
# define riscv_savefpu(regs, fregs)
# define riscv_restorefpu(regs, fregs)
#endif #endif
/* Save / restore context of task */ /* Save / restore context of task */
@ -213,11 +217,23 @@ void riscv_fpuconfig(void);
static inline void riscv_savecontext(struct tcb_s *tcb) static inline void riscv_savecontext(struct tcb_s *tcb)
{ {
tcb->xcp.regs = (uintptr_t *)CURRENT_REGS; tcb->xcp.regs = (uintptr_t *)CURRENT_REGS;
#ifdef CONFIG_ARCH_FPU
/* Save current process FPU state to TCB */
riscv_savefpu(tcb->xcp.regs, tcb->xcp.fregs);
#endif
} }
static inline void riscv_restorecontext(struct tcb_s *tcb) static inline void riscv_restorecontext(struct tcb_s *tcb)
{ {
CURRENT_REGS = (uintptr_t *)tcb->xcp.regs; CURRENT_REGS = (uintptr_t *)tcb->xcp.regs;
#ifdef CONFIG_ARCH_FPU
/* Restore FPU state for next process */
riscv_restorefpu(tcb->xcp.regs, tcb->xcp.fregs);
#endif
} }
/* RISC-V PMP Config ********************************************************/ /* RISC-V PMP Config ********************************************************/

View File

@ -100,13 +100,6 @@
.macro riscv_savefpu in .macro riscv_savefpu in
#ifdef CONFIG_ARCH_FPU
REGLOAD t0, REG_INT_CTX(\in)
li t1, MSTATUS_FS
and t0, t0, t1
li t1, MSTATUS_FS_INIT
ble t0, t1, 1f
/* Store all floating point registers */ /* Store all floating point registers */
FSTORE f0, REG_F0(\in) FSTORE f0, REG_F0(\in)
@ -145,14 +138,6 @@
frcsr t0 frcsr t0
REGSTORE t0, REG_FCSR(\in) REGSTORE t0, REG_FCSR(\in)
1:
/* Restore what we have just destroyed (t0, t1) */
REGLOAD t0, REG_T0(\in)
REGLOAD t1, REG_T1(\in)
#endif
.endm .endm
/**************************************************************************** /****************************************************************************
@ -216,13 +201,6 @@
.macro riscv_loadfpu out .macro riscv_loadfpu out
#ifdef CONFIG_ARCH_FPU
REGLOAD t0, REG_INT_CTX(\out)
li t1, MSTATUS_FS
and t0, t0, t1
li t1, MSTATUS_FS_INIT
ble t0, t1, 1f
/* Load all floating point registers */ /* Load all floating point registers */
FLOAD f0, REG_F0(\out) FLOAD f0, REG_F0(\out)
@ -263,14 +241,6 @@
REGLOAD t0, REG_FCSR(\out) REGLOAD t0, REG_FCSR(\out)
fscsr t0 fscsr t0
1:
/* Restore what we have just destroyed (t0, t1) */
REGLOAD t0, REG_T0(\out)
REGLOAD t1, REG_T1(\out)
#endif
.endm .endm
/**************************************************************************** /****************************************************************************

View File

@ -153,7 +153,7 @@ int riscv_swint(int irq, void *context, void *arg)
struct tcb_s *next = (struct tcb_s *)regs[REG_A1]; struct tcb_s *next = (struct tcb_s *)regs[REG_A1];
DEBUGASSERT(regs[REG_A1] != 0); DEBUGASSERT(regs[REG_A1] != 0);
CURRENT_REGS = (uintptr_t *)next->xcp.regs; riscv_restorecontext(next);
} }
break; break;
@ -180,8 +180,8 @@ int riscv_swint(int irq, void *context, void *arg)
struct tcb_s *next = (struct tcb_s *)regs[REG_A2]; struct tcb_s *next = (struct tcb_s *)regs[REG_A2];
DEBUGASSERT(regs[REG_A1] != 0 && regs[REG_A2] != 0); DEBUGASSERT(regs[REG_A1] != 0 && regs[REG_A2] != 0);
prev->xcp.regs = (uintptr_t *)CURRENT_REGS; riscv_savecontext(prev);
CURRENT_REGS = (uintptr_t *)next->xcp.regs; riscv_restorecontext(next);
} }
break; break;

View File

@ -109,8 +109,6 @@ sys_call6:
addi s0, sp, XCPTCONTEXT_SIZE addi s0, sp, XCPTCONTEXT_SIZE
REGSTORE s0, REG_SP(sp) /* original SP */ REGSTORE s0, REG_SP(sp) /* original SP */
riscv_savefpu sp
mv a0, sp /* a0 = context */ mv a0, sp /* a0 = context */
#if CONFIG_ARCH_INTERRUPTSTACK > 15 #if CONFIG_ARCH_INTERRUPTSTACK > 15
@ -127,8 +125,6 @@ sys_call6:
mv sp, a0 /* use sp, as a0 gets wiped */ mv sp, a0 /* use sp, as a0 gets wiped */
riscv_loadfpu sp
REGLOAD s0, REG_EPC(sp) /* restore epc */ REGLOAD s0, REG_EPC(sp) /* restore epc */
csrw CSR_EPC, s0 csrw CSR_EPC, s0