riscv/fpu: Restore correct lazy-FPU functionality

- Save the FPU registers into the tcb so they don't get lost if the stack
  frame for xcp.regs moves (as it does)
- Handle interger and FPU register save/load separately
- Integer registers are saved/loaded always, like before
- FPU registers are only saved during a context switch:
  - Save ONLY if FPU is dirty
  - Restore always if FPU has been used (not in FSTATE_OFF, FSTATE_INIT)
- Remove all lazy-FPU related logic from the macros, it is not needed
This commit is contained in:
Ville Juven 2023-05-15 17:56:15 +03:00 committed by Xiang Xiao
parent 040eb3c990
commit 35c27b5a9a
9 changed files with 174 additions and 106 deletions

View File

@ -204,50 +204,52 @@
#endif
#ifdef CONFIG_ARCH_FPU
# define REG_F0_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 0)
# define REG_F1_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 1)
# define REG_F2_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 2)
# define REG_F3_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 3)
# define REG_F4_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 4)
# define REG_F5_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 5)
# define REG_F6_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 6)
# define REG_F7_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 7)
# define REG_F8_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 8)
# define REG_F9_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 9)
# define REG_F10_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 10)
# define REG_F11_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 11)
# define REG_F12_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 12)
# define REG_F13_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 13)
# define REG_F14_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 14)
# define REG_F15_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 15)
# define REG_F16_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 16)
# define REG_F17_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 17)
# define REG_F18_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 18)
# define REG_F19_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 19)
# define REG_F20_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 20)
# define REG_F21_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 21)
# define REG_F22_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 22)
# define REG_F23_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 23)
# define REG_F24_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 24)
# define REG_F25_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 25)
# define REG_F26_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 26)
# define REG_F27_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 27)
# define REG_F28_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 28)
# define REG_F29_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 29)
# define REG_F30_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 30)
# define REG_F31_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 31)
# define REG_FCSR_NDX (INT_XCPT_REGS + FPU_REG_SIZE * 32)
# define REG_F0_NDX (FPU_REG_SIZE * 0)
# define REG_F1_NDX (FPU_REG_SIZE * 1)
# define REG_F2_NDX (FPU_REG_SIZE * 2)
# define REG_F3_NDX (FPU_REG_SIZE * 3)
# define REG_F4_NDX (FPU_REG_SIZE * 4)
# define REG_F5_NDX (FPU_REG_SIZE * 5)
# define REG_F6_NDX (FPU_REG_SIZE * 6)
# define REG_F7_NDX (FPU_REG_SIZE * 7)
# define REG_F8_NDX (FPU_REG_SIZE * 8)
# define REG_F9_NDX (FPU_REG_SIZE * 9)
# define REG_F10_NDX (FPU_REG_SIZE * 10)
# define REG_F11_NDX (FPU_REG_SIZE * 11)
# define REG_F12_NDX (FPU_REG_SIZE * 12)
# define REG_F13_NDX (FPU_REG_SIZE * 13)
# define REG_F14_NDX (FPU_REG_SIZE * 14)
# define REG_F15_NDX (FPU_REG_SIZE * 15)
# define REG_F16_NDX (FPU_REG_SIZE * 16)
# define REG_F17_NDX (FPU_REG_SIZE * 17)
# define REG_F18_NDX (FPU_REG_SIZE * 18)
# define REG_F19_NDX (FPU_REG_SIZE * 19)
# define REG_F20_NDX (FPU_REG_SIZE * 20)
# define REG_F21_NDX (FPU_REG_SIZE * 21)
# define REG_F22_NDX (FPU_REG_SIZE * 22)
# define REG_F23_NDX (FPU_REG_SIZE * 23)
# define REG_F24_NDX (FPU_REG_SIZE * 24)
# define REG_F25_NDX (FPU_REG_SIZE * 25)
# define REG_F26_NDX (FPU_REG_SIZE * 26)
# define REG_F27_NDX (FPU_REG_SIZE * 27)
# define REG_F28_NDX (FPU_REG_SIZE * 28)
# define REG_F29_NDX (FPU_REG_SIZE * 29)
# define REG_F30_NDX (FPU_REG_SIZE * 30)
# define REG_F31_NDX (FPU_REG_SIZE * 31)
# define REG_FCSR_NDX (FPU_REG_SIZE * 32)
# define FPU_XCPT_REGS (FPU_REG_SIZE * 33)
# define FPU_REG_FULL_SIZE (INT_REG_SIZE * FPU_REG_SIZE)
# define FPU_XCPT_SIZE (INT_REG_SIZE * FPU_XCPT_REGS)
#else /* !CONFIG_ARCH_FPU */
# define FPU_XCPT_REGS (0)
# define FPU_REG_FULL_SIZE (0)
# define FPU_XCPT_SIZE (0)
#endif /* CONFIG_ARCH_FPU */
#define XCPTCONTEXT_REGS (INT_XCPT_REGS + FPU_XCPT_REGS)
#define XCPTCONTEXT_SIZE (INT_REG_SIZE * XCPTCONTEXT_REGS)
/* Save only integer regs. FPU is handled separately */
#define XCPTCONTEXT_SIZE (INT_XCPT_SIZE)
/* In assembly language, values have to be referenced as byte address
* offsets. But in C, it is more convenient to reference registers as
@ -562,9 +564,15 @@ struct xcptcontext
#endif
#endif
/* Register save area */
/* Integer register save area */
uintptr_t *regs;
/* FPU register save area */
#ifdef CONFIG_ARCH_FPU
uintptr_t fregs[FPU_XCPT_SIZE];
#endif
};
#endif /* __ASSEMBLY__ */

View File

@ -113,8 +113,6 @@ exception_common:
csrr s0, CSR_EPC
REGSTORE s0, REG_EPC(sp) /* exception PC */
riscv_savefpu sp
/* Setup arg0(exception cause), arg1(context) */
csrr a0, CSR_CAUSE /* exception cause */
@ -148,8 +146,6 @@ exception_common:
mv sp, a0
riscv_loadfpu sp
REGLOAD s0, REG_EPC(sp) /* restore sepc */
csrw CSR_EPC, s0

View File

@ -1,4 +1,4 @@
/************************************************************************************
/****************************************************************************
* arch/risc-v/src/common/riscv_fpu.S
*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -16,11 +16,11 @@
* License for the specific language governing permissions and limitations
* under the License.
*
************************************************************************************/
****************************************************************************/
/************************************************************************************
/****************************************************************************
* Included Files
************************************************************************************/
****************************************************************************/
#include <nuttx/config.h>
@ -29,20 +29,24 @@
#include <arch/irq.h>
#include <arch/mode.h>
#include "riscv_macros.S"
#ifdef CONFIG_ARCH_FPU
/************************************************************************************
/****************************************************************************
* Public Symbols
************************************************************************************/
****************************************************************************/
.globl riscv_fpuconfig
.globl riscv_savefpu
.globl riscv_restorefpu
.file "riscv_fpu.S"
/************************************************************************************
/****************************************************************************
* Public Functions
************************************************************************************/
****************************************************************************/
/************************************************************************************
/****************************************************************************
* Name: riscv_fpuconfig
*
* Description:
@ -57,7 +61,7 @@
* Returned Value:
* This function does not return anything explicitly.
*
************************************************************************************/
****************************************************************************/
.type riscv_fpuconfig, function
@ -71,4 +75,81 @@ riscv_fpuconfig:
fence.i
ret
/****************************************************************************
* Name: riscv_savefpu
*
* Description:
* Given the pointer to a register save area (in A0), save the state of the
* floating point registers.
*
* C Function Prototype:
* void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs);
*
* Input Parameters:
* regs - A pointer to the integer registers that contain the status
* fregs - A pointer to the register save area in which to save the
* floating point registers
*
* Returned Value:
* None
*
****************************************************************************/
.type riscv_savefpu, function
riscv_savefpu:
REGLOAD t0, REG_INT_CTX(a0)
li t1, MSTATUS_FS
and t2, t0, t1
li t1, MSTATUS_FS_DIRTY
bne t2, t1, 1f
li t1, ~MSTATUS_FS
and t0, t0, t1
li t1, MSTATUS_FS_CLEAN
or t0, t0, t1
REGSTORE t0, REG_INT_CTX(a0)
riscv_savefpu a1
1:
ret
/****************************************************************************
* Name: riscv_restorefpu
*
* Description:
* Given the pointer to a register save area (in A0), restore the state of
* the floating point registers.
*
* C Function Prototype:
* void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs);
*
* Input Parameters:
* regs - A pointer to the integer registers that contain the status
* fregs - A pointer to the register save area containing the floating
* point registers.
*
* Returned Value:
* This function does not return anything explicitly. However, it is
* called from interrupt level assembly logic that assumes that r0 is
* preserved.
*
****************************************************************************/
.type riscv_restorefpu, function
riscv_restorefpu:
REGLOAD t0, REG_INT_CTX(a0)
li t1, MSTATUS_FS
and t2, t0, t1
li t1, MSTATUS_FS_INIT
ble t2, t1, 1f
riscv_loadfpu a1
1:
ret
#endif /* CONFIG_ARCH_FPU */

View File

@ -204,8 +204,12 @@ void riscv_exception_attach(void);
#ifdef CONFIG_ARCH_FPU
void riscv_fpuconfig(void);
void riscv_savefpu(uintptr_t *regs, uintptr_t *fregs);
void riscv_restorefpu(uintptr_t *regs, uintptr_t *fregs);
#else
# define riscv_fpuconfig()
# define riscv_savefpu(regs, fregs)
# define riscv_restorefpu(regs, fregs)
#endif
/* Save / restore context of task */
@ -213,11 +217,23 @@ void riscv_fpuconfig(void);
static inline void riscv_savecontext(struct tcb_s *tcb)
{
tcb->xcp.regs = (uintptr_t *)CURRENT_REGS;
#ifdef CONFIG_ARCH_FPU
/* Save current process FPU state to TCB */
riscv_savefpu(tcb->xcp.regs, tcb->xcp.fregs);
#endif
}
static inline void riscv_restorecontext(struct tcb_s *tcb)
{
CURRENT_REGS = (uintptr_t *)tcb->xcp.regs;
#ifdef CONFIG_ARCH_FPU
/* Restore FPU after the new address environment is instantiated */
riscv_restorefpu(tcb->xcp.regs, tcb->xcp.fregs);
#endif
}
/* RISC-V PMP Config ********************************************************/

View File

@ -100,13 +100,6 @@
.macro riscv_savefpu in
#ifdef CONFIG_ARCH_FPU
REGLOAD t0, REG_INT_CTX(\in)
li t1, MSTATUS_FS
and t0, t0, t1
li t1, MSTATUS_FS_INIT
ble t0, t1, 1f
/* Store all floating point registers */
FSTORE f0, REG_F0(\in)
@ -145,14 +138,6 @@
frcsr t0
REGSTORE t0, REG_FCSR(\in)
1:
/* Restore what we have just destroyed (t0, t1) */
REGLOAD t0, REG_T0(\in)
REGLOAD t1, REG_T1(\in)
#endif
.endm
/****************************************************************************
@ -216,13 +201,6 @@
.macro riscv_loadfpu out
#ifdef CONFIG_ARCH_FPU
REGLOAD t0, REG_INT_CTX(\out)
li t1, MSTATUS_FS
and t0, t0, t1
li t1, MSTATUS_FS_INIT
ble t0, t1, 1f
/* Load all floating point registers */
FLOAD f0, REG_F0(\out)
@ -263,14 +241,6 @@
REGLOAD t0, REG_FCSR(\out)
fscsr t0
1:
/* Restore what we have just destroyed (t0, t1) */
REGLOAD t0, REG_T0(\out)
REGLOAD t1, REG_T1(\out)
#endif
.endm
/****************************************************************************

View File

@ -153,7 +153,7 @@ int riscv_swint(int irq, void *context, void *arg)
struct tcb_s *next = (struct tcb_s *)regs[REG_A1];
DEBUGASSERT(regs[REG_A1] != 0);
CURRENT_REGS = (uintptr_t *)next->xcp.regs;
riscv_restorecontext(next);
}
break;
@ -180,8 +180,8 @@ int riscv_swint(int irq, void *context, void *arg)
struct tcb_s *next = (struct tcb_s *)regs[REG_A2];
DEBUGASSERT(regs[REG_A1] != 0 && regs[REG_A2] != 0);
prev->xcp.regs = (uintptr_t *)CURRENT_REGS;
CURRENT_REGS = (uintptr_t *)next->xcp.regs;
riscv_savecontext(prev);
riscv_restorecontext(next);
}
break;

View File

@ -227,18 +227,18 @@ pid_t up_vfork(const struct vfork_s *context)
child->cmn.xcp.regs[REG_GP] = newsp; /* Global pointer */
#endif
#ifdef CONFIG_ARCH_FPU
child->cmn.xcp.regs[REG_FS0] = context->fs0; /* Saved register fs1 */
child->cmn.xcp.regs[REG_FS1] = context->fs1; /* Saved register fs1 */
child->cmn.xcp.regs[REG_FS2] = context->fs2; /* Saved register fs2 */
child->cmn.xcp.regs[REG_FS3] = context->fs3; /* Saved register fs3 */
child->cmn.xcp.regs[REG_FS4] = context->fs4; /* Saved register fs4 */
child->cmn.xcp.regs[REG_FS5] = context->fs5; /* Saved register fs5 */
child->cmn.xcp.regs[REG_FS6] = context->fs6; /* Saved register fs6 */
child->cmn.xcp.regs[REG_FS7] = context->fs7; /* Saved register fs7 */
child->cmn.xcp.regs[REG_FS8] = context->fs8; /* Saved register fs8 */
child->cmn.xcp.regs[REG_FS9] = context->fs9; /* Saved register fs9 */
child->cmn.xcp.regs[REG_FS10] = context->fs10; /* Saved register fs10 */
child->cmn.xcp.regs[REG_FS11] = context->fs11; /* Saved register fs11 */
child->cmn.xcp.fregs[REG_FS0] = context->fs0; /* Saved register fs1 */
child->cmn.xcp.fregs[REG_FS1] = context->fs1; /* Saved register fs1 */
child->cmn.xcp.fregs[REG_FS2] = context->fs2; /* Saved register fs2 */
child->cmn.xcp.fregs[REG_FS3] = context->fs3; /* Saved register fs3 */
child->cmn.xcp.fregs[REG_FS4] = context->fs4; /* Saved register fs4 */
child->cmn.xcp.fregs[REG_FS5] = context->fs5; /* Saved register fs5 */
child->cmn.xcp.fregs[REG_FS6] = context->fs6; /* Saved register fs6 */
child->cmn.xcp.fregs[REG_FS7] = context->fs7; /* Saved register fs7 */
child->cmn.xcp.fregs[REG_FS8] = context->fs8; /* Saved register fs8 */
child->cmn.xcp.fregs[REG_FS9] = context->fs9; /* Saved register fs9 */
child->cmn.xcp.fregs[REG_FS10] = context->fs10; /* Saved register fs10 */
child->cmn.xcp.fregs[REG_FS11] = context->fs11; /* Saved register fs11 */
#endif
#ifdef CONFIG_LIB_SYSCALL

View File

@ -90,6 +90,7 @@
#endif
#ifdef CONFIG_ARCH_FPU
# define FPU_REG_FULL_SIZE (INT_REG_SIZE * FPU_REG_SIZE)
# define VFORK_FS0_OFFSET (VFORK_INT_SIZE + 0*FPU_REG_FULL_SIZE)
# define VFORK_FS1_OFFSET (VFORK_INT_SIZE + 1*FPU_REG_FULL_SIZE)
# define VFORK_FS2_OFFSET (VFORK_INT_SIZE + 2*FPU_REG_FULL_SIZE)

View File

@ -109,8 +109,6 @@ sys_call6:
addi s0, sp, XCPTCONTEXT_SIZE
REGSTORE s0, REG_SP(sp) /* original SP */
riscv_savefpu sp
mv a0, sp /* a0 = context */
#if CONFIG_ARCH_INTERRUPTSTACK > 15
@ -127,8 +125,6 @@ sys_call6:
mv sp, a0 /* use sp, as a0 gets wiped */
riscv_loadfpu sp
REGLOAD s0, REG_EPC(sp) /* restore epc */
csrw CSR_EPC, s0