diff --git a/arch/xtensa/include/irq.h b/arch/xtensa/include/irq.h index 9c00ba6a00..d21539b351 100644 --- a/arch/xtensa/include/irq.h +++ b/arch/xtensa/include/irq.h @@ -106,13 +106,10 @@ #endif #ifndef __XTENSA_CALL0_ABI__ - /* Temporary space for saving stuff during window spill. - * REVISIT: I don't think that we need so many temporaries. - */ + /* Temporary space for saving stuff during window spill. */ # define REG_TMP0 (_REG_WINDOW_TMPS + 0) -# define REG_TMP1 (_REG_WINDOW_TMPS + 1) -# define _REG_OVLY_START (_REG_WINDOW_TMPS + 2) +# define _REG_OVLY_START (_REG_WINDOW_TMPS + 1) #else # define _REG_OVLY_START _REG_WINDOW_TMPS #endif diff --git a/arch/xtensa/src/common/xtensa_asm_utils.h b/arch/xtensa/src/common/xtensa_asm_utils.h new file mode 100644 index 0000000000..8dec082a0d --- /dev/null +++ b/arch/xtensa/src/common/xtensa_asm_utils.h @@ -0,0 +1,76 @@ +/**************************************************************************** + * arch/xtensa/src/common/xtensa_asm_utils.h + * + * Copyright (c) 2017, Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + ****************************************************************************/ + +#ifndef __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H +#define __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H + +/**************************************************************************** + * Assembly Language Macros + ****************************************************************************/ + +/**************************************************************************** + * + * Name: SPILL_ALL_WINDOWS + * + * Spills all windowed registers (i.e. registers not visible as + * A0-A15) to their ABI-defined spill regions on the stack. + * + * Unlike the Xtensa HAL implementation, this code requires that the + * EXCM and WOE bit be enabled in PS, and relies on repeated hardware + * exception handling to do the register spills. The trick is to do a + * noop write to the high registers, which the hardware will trap + * (into an overflow exception) in the case where those registers are + * already used by an existing call frame. Then it rotates the window + * and repeats until all but the A0-A3 registers of the original frame + * are guaranteed to be spilled, eventually rotating back around into + * the original frame. Advantages: + * + * - Vastly smaller code size + * + * - More easily maintained if changes are needed to window over/underflow + * exception handling. + * + * - Requires no scratch registers to do its work, so can be used safely in + * any context. + * + * - If the WOE bit is not enabled (for example, in code written for + * the CALL0 ABI), this becomes a silent noop and operates compatbily. + * + * - Hilariously it's ACTUALLY FASTER than the HAL routine. And not + * just a little bit, it's MUCH faster. With a mostly full register + * file on an LX6 core (ESP-32) I'm measuring 145 cycles to spill + * registers with this vs. 279 (!) to do it with + * xthal_spill_windows(). + ****************************************************************************/ + +.macro SPILL_ALL_WINDOWS +#if XCHAL_NUM_AREGS == 64 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 4 +#elif XCHAL_NUM_AREGS == 32 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a4, a4, a4 + rotw 2 +#else +#error Unrecognized XCHAL_NUM_AREGS +#endif +.endm + +#endif /* __ARCH_XTENSA_SRC_COMMON_XTENSA_ASM_UTILS_H */ diff --git a/arch/xtensa/src/common/xtensa_context.S b/arch/xtensa/src/common/xtensa_context.S index 845e0c5dc8..2d2d9d920f 100644 --- a/arch/xtensa/src/common/xtensa_context.S +++ b/arch/xtensa/src/common/xtensa_context.S @@ -68,6 +68,7 @@ #include "syscall.h" #include "xtensa_swi.h" +#include "xtensa_asm_utils.h" /**************************************************************************** * Public Functions @@ -160,16 +161,43 @@ _xtensa_context_save: #error Overlay support is not implemented #endif - s32i a0, sp, (4 * REG_TMP0) /* Save return address */ - s32i sp, sp, (4 * REG_TMP1) /* Save current stack pointer */ - wsr sp, EXCSAVE_1 /* Preserve register save area */ + /* SPILL_ALL_WINDOWS macro requires window overflow exceptions to be enabled, + * i.e. PS.EXCM cleared and PS.WOE set. + * Since we are going to clear PS.EXCM, we also need to increase INTLEVEL + * at least to XCHAL_EXCM_LEVEL. This matches that value of effective INTLEVEL + * at entry (CINTLEVEL=max(PS.INTLEVEL, XCHAL_EXCM_LEVEL) when PS.EXCM is set. + * Since WindowOverflow exceptions will trigger inside SPILL_ALL_WINDOWS, + * we need to save/restore EPC1 as well. + * NOTE: Even though a4-a15 are saved into the exception frame, we should not + * clobber them until after SPILL_ALL_WINDOWS. This is because these registers + * may contain live windows belonging to previous frames in the call stack. + * These frames will be spilled by SPILL_ALL_WINDOWS, and if the register was + * used as a temporary by this code, the temporary value would get stored + * onto the stack, instead of the real value. + */ - l32i sp, sp, (4 * REG_A1) /* Restore the interruptee's SP */ - call0 _xtensa_window_spill /* Preserves only a4-a5, a8-a9, a12-a13 */ + s32i a0, sp, (4 * REG_TMP0) /* Save return address */ + rsr a2, PS /* To be restored after SPILL_ALL_WINDOWS */ + movi a0, PS_INTLEVEL_MASK + and a3, a2, a0 /* Get the current INTLEVEL */ + bgeui a3, XCHAL_EXCM_LEVEL, 1f /* Calculate max(INTLEVEL, XCHAL_EXCM_LEVEL) */ + movi a3, XCHAL_EXCM_LEVEL +1: + movi a0, PS_UM | PS_WOE /* Clear EXCM, enable window overflow, set new INTLEVEL */ + or a3, a3, a0 + wsr a3, ps + rsync + rsr a0, EPC1 /* To be restored after SPILL_ALL_WINDOWS */ + + addi sp, sp, XCPTCONTEXT_SIZE /* Go back to spill register region */ + SPILL_ALL_WINDOWS /* Place the live register windows there */ + addi sp, sp, -XCPTCONTEXT_SIZE /* Return the current stack pointer and proceed with context save*/ + + wsr a2, PS /* Restore PS to the value at entry */ + wsr a0, EPC1 /* Restore EPC1 to the value at entry */ + rsync + l32i a0, sp, (4 * REG_TMP0) /* Restore return address */ - rsr sp, EXCSAVE_1 /* Save interruptee's a0 */ - l32i a0, sp, (4 * REG_TMP0) /* Save return address */ - l32i sp, sp, (4 * REG_TMP1) /* Save current stack pointer */ #endif ret diff --git a/arch/xtensa/src/common/xtensa_int_handlers.S b/arch/xtensa/src/common/xtensa_int_handlers.S index 2bdb9978ce..e2b71dd46f 100644 --- a/arch/xtensa/src/common/xtensa_int_handlers.S +++ b/arch/xtensa/src/common/xtensa_int_handlers.S @@ -352,7 +352,7 @@ _xtensa_level1_handler: l32i a0, a2, (4 * REG_PC) /* Retrieve interruptee's PC */ wsr a0, EPC_1 l32i a0, a2, (4 * REG_A0) /* Retrieve interruptee's A0 */ - l32i sp, a2, (4 * REG_A1) /* Retrieve interrupt stack frame */ + l32i sp, a2, (4 * REG_A1) /* Retrieve interrupt stack frame */ l32i a2, a2, (4 * REG_A2) /* Retrieve interruptee's A2 */ rsync /* Ensure PS and EPC written */