arch/stack_color: correct the end address of stack color
The different optimization of compilers will cause ambiguity in
obtaining sp through up_getsp() in arm_stack_color(), if compile
with clang and enable the optimization flag (-Ofast), up_getsp()
call will be earlier than push {r0-r9,lr}, the end address of color
stack will overlap with saved registers.
Compile line:
clang --target=arm-none-eabi -c "-Ofast" -fno-builtin -march=armv8.1-m.main+mve.fp+fp.dp \
-mtune=cortex-m55 -mthumb -mfpu=fpv5-d16 -mfloat-abi=hard -D__NuttX__ -common/arm_checkstack.c -o arm_checkstack.o
Assembler code:
llvm-objdump -aS arm_checkstack.o
------------------------------------
|00000000 <arm_stack_color>:
|; start = INT32_ALIGN_UP((uintptr_t)stackbase);
| 0: c2 1c adds r2, r0, #3
| 2: 22 f0 03 02 bic r2, r2, #3
|; end = nbytes ? INT32_ALIGN_DOWN((uintptr_t)stackbase + nbytes) :
| 6: 19 b1 cbz r1, 0x10 <arm_stack_color+0x10> @ imm = #6
| 8: 08 44 add r0, r1
| a: 20 f0 03 00 bic r0, r0, #3
| e: 00 e0 b 0x12 <arm_stack_color+0x12> @ imm = #0
|; __asm__
| 10: 68 46 mov r0, sp <--- fetch the sp before push {r7 lr}
| 12: 80 b5 push {r7, lr} <--- sp changed
|; nwords = (end - start) >> 2;
| 14: 80 1a subs r0, r0, r2
| 16: 80 08 lsrs r0, r0, #2
|; }
| 18: 08 bf it eq
| 1a: 80 bd popeq {r7, pc}
| 1c: 4b f6 ef 63 movw r3, #48879
| 20: cd f6 ad 63 movt r3, #57005
| 24: a0 ee 10 3b vdup.32 q0, r3
|; while (nwords-- > 0)
| 28: 20 f0 01 e0 dlstp.32 lr, r0
|; *ptr++ = STACK_COLOR; <--- overwrite
| 2c: a2 ec 04 1f vstrw.32 q0, [r2], #16
| 30: 1f f0 05 c0 letp lr, 0x2c <arm_stack_color+0x2c> @ imm = #-8
|; }
| 34: 80 bd pop {r7, pc}
------------------------------------
Signed-off-by: chao.an <anchao@xiaomi.com>