xtensa_context.S: Use Zephyr's version of spilling the window register

file.

Signed-off-by: Abdelatif Guettouche <abdelatif.guettouche@espressif.com>
This commit is contained in:
Abdelatif Guettouche 2022-03-11 19:36:56 +01:00 committed by Xiang Xiao
parent 2445de173d
commit 5305f76b1d
4 changed files with 115 additions and 14 deletions

View File

@ -106,13 +106,10 @@
#endif
#ifndef __XTENSA_CALL0_ABI__
/* Temporary space for saving stuff during window spill.
* REVISIT: I don't think that we need so many temporaries.
*/
/* Temporary space for saving stuff during window spill. */
# define REG_TMP0 (_REG_WINDOW_TMPS + 0)
# define REG_TMP1 (_REG_WINDOW_TMPS + 1)
# define _REG_OVLY_START (_REG_WINDOW_TMPS + 2)
# define _REG_OVLY_START (_REG_WINDOW_TMPS + 1)
#else
# define _REG_OVLY_START _REG_WINDOW_TMPS
#endif

View File

@ -0,0 +1,76 @@
/****************************************************************************
* arch/xtensa/src/common/xtensa_asm_utils.h
*
* Copyright (c) 2017, Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*
****************************************************************************/
#ifndef __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H
#define __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H
/****************************************************************************
* Assembly Language Macros
****************************************************************************/
/****************************************************************************
*
* Name: SPILL_ALL_WINDOWS
*
* Spills all windowed registers (i.e. registers not visible as
* A0-A15) to their ABI-defined spill regions on the stack.
*
* Unlike the Xtensa HAL implementation, this code requires that the
* EXCM and WOE bit be enabled in PS, and relies on repeated hardware
* exception handling to do the register spills. The trick is to do a
* noop write to the high registers, which the hardware will trap
* (into an overflow exception) in the case where those registers are
* already used by an existing call frame. Then it rotates the window
* and repeats until all but the A0-A3 registers of the original frame
* are guaranteed to be spilled, eventually rotating back around into
* the original frame. Advantages:
*
* - Vastly smaller code size
*
* - More easily maintained if changes are needed to window over/underflow
* exception handling.
*
* - Requires no scratch registers to do its work, so can be used safely in
* any context.
*
* - If the WOE bit is not enabled (for example, in code written for
* the CALL0 ABI), this becomes a silent noop and operates compatbily.
*
* - Hilariously it's ACTUALLY FASTER than the HAL routine. And not
* just a little bit, it's MUCH faster. With a mostly full register
* file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill
* registers with this vs. 279 (!) to do it with
* xthal_spill_windows().
****************************************************************************/
.macro SPILL_ALL_WINDOWS
#if XCHAL_NUM_AREGS == 64
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 4
#elif XCHAL_NUM_AREGS == 32
and a12, a12, a12
rotw 3
and a12, a12, a12
rotw 3
and a4, a4, a4
rotw 2
#else
#error Unrecognized XCHAL_NUM_AREGS
#endif
.endm
#endif /* __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H */

View File

@ -68,6 +68,7 @@
#include "syscall.h"
#include "xtensa_swi.h"
#include "xtensa_asm_utils.h"
/****************************************************************************
* Public Functions
@ -160,16 +161,43 @@ _xtensa_context_save:
#error Overlay support is not implemented
#endif
s32i a0, sp, (4 * REG_TMP0) /* Save return address */
s32i sp, sp, (4 * REG_TMP1) /* Save current stack pointer */
wsr sp, EXCSAVE_1 /* Preserve register save area */
/* SPILL_ALL_WINDOWS macro requires window overflow exceptions to be enabled,
* i.e. PS.EXCM cleared and PS.WOE set.
* Since we are going to clear PS.EXCM, we also need to increase INTLEVEL
* at least to XCHAL_EXCM_LEVEL. This matches that value of effective INTLEVEL
* at entry (CINTLEVEL=max(PS.INTLEVEL, XCHAL_EXCM_LEVEL) when PS.EXCM is set.
* Since WindowOverflow exceptions will trigger inside SPILL_ALL_WINDOWS,
* we need to save/restore EPC1 as well.
* NOTE: Even though a4-a15 are saved into the exception frame, we should not
* clobber them until after SPILL_ALL_WINDOWS. This is because these registers
* may contain live windows belonging to previous frames in the call stack.
* These frames will be spilled by SPILL_ALL_WINDOWS, and if the register was
* used as a temporary by this code, the temporary value would get stored
* onto the stack, instead of the real value.
*/
l32i sp, sp, (4 * REG_A1) /* Restore the interruptee's SP */
call0 _xtensa_window_spill /* Preserves only a4-a5, a8-a9, a12-a13 */
s32i a0, sp, (4 * REG_TMP0) /* Save return address */
rsr a2, PS /* To be restored after SPILL_ALL_WINDOWS */
movi a0, PS_INTLEVEL_MASK
and a3, a2, a0 /* Get the current INTLEVEL */
bgeui a3, XCHAL_EXCM_LEVEL, 1f /* Calculate max(INTLEVEL, XCHAL_EXCM_LEVEL) */
movi a3, XCHAL_EXCM_LEVEL
1:
movi a0, PS_UM | PS_WOE /* Clear EXCM, enable window overflow, set new INTLEVEL */
or a3, a3, a0
wsr a3, ps
rsync
rsr a0, EPC1 /* To be restored after SPILL_ALL_WINDOWS */
addi sp, sp, XCPTCONTEXT_SIZE /* Go back to spill register region */
SPILL_ALL_WINDOWS /* Place the live register windows there */
addi sp, sp, -XCPTCONTEXT_SIZE /* Return the current stack pointer and proceed with context save*/
wsr a2, PS /* Restore PS to the value at entry */
wsr a0, EPC1 /* Restore EPC1 to the value at entry */
rsync
l32i a0, sp, (4 * REG_TMP0) /* Restore return address */
rsr sp, EXCSAVE_1 /* Save interruptee's a0 */
l32i a0, sp, (4 * REG_TMP0) /* Save return address */
l32i sp, sp, (4 * REG_TMP1) /* Save current stack pointer */
#endif
ret

View File

@ -352,7 +352,7 @@ _xtensa_level1_handler:
l32i a0, a2, (4 * REG_PC) /* Retrieve interruptee's PC */
wsr a0, EPC_1
l32i a0, a2, (4 * REG_A0) /* Retrieve interruptee's A0 */
l32i sp, a2, (4 * REG_A1) /* Retrieve interrupt stack frame */
l32i sp, a2, (4 * REG_A1) /* Retrieve interrupt stack frame */
l32i a2, a2, (4 * REG_A2) /* Retrieve interruptee's A2 */
rsync /* Ensure PS and EPC written */