From e2df52390ace192e8fe632ecb83c2fa44373259a Mon Sep 17 00:00:00 2001 From: ligd Date: Wed, 10 Aug 2022 17:22:48 +0800 Subject: [PATCH] SMP: fix crash when switch to new task which is still running Situation: Assume we have 2 cpus, and busy run task0. CPU0 CPU1 task0 -> task1 task2 -> task0 1. remove task0 form runninglist 2. take task1 as new tcb 3. add task0 to blocklist 4. clear spinlock 4.1 remove task2 form runninglist 4.2 take task0 as new tcb 4.3 add task2 to blocklist 4.4 use svc ISR swith to task0 4.5 crash 5. use svc ISR swith to task1 Fix: Move clear spinlock to the end of svc ISR Signed-off-by: ligd --- arch/arm/src/arm/arm_syscall.c | 9 ++- arch/arm/src/armv6-m/arm_doirq.c | 11 +++- arch/arm/src/armv7-a/arm_doirq.c | 9 ++- arch/arm/src/armv7-a/arm_syscall.c | 8 ++- arch/arm/src/armv7-m/arm_doirq.c | 11 +++- arch/arm/src/armv7-r/arm_doirq.c | 9 ++- arch/arm/src/armv7-r/arm_syscall.c | 8 ++- arch/arm/src/armv8-m/arm_doirq.c | 11 +++- arch/arm64/src/common/arm64_doirq.c | 6 +- arch/arm64/src/common/arm64_syscall.c | 7 +++ arch/ceva/src/common/up_doirq.c | 6 +- arch/risc-v/src/common/riscv_doirq.c | 19 ++++-- .../common/supervisor/riscv_perform_syscall.c | 16 ++++- arch/sim/src/sim/up_blocktask.c | 4 ++ arch/sim/src/sim/up_exit.c | 4 ++ arch/sim/src/sim/up_releasepending.c | 4 ++ arch/sim/src/sim/up_reprioritizertr.c | 4 ++ arch/sim/src/sim/up_smpsignal.c | 4 ++ arch/sim/src/sim/up_unblocktask.c | 8 +++ arch/xtensa/src/common/xtensa_irqdispatch.c | 9 ++- include/nuttx/irq.h | 20 +++++++ sched/irq/irq_csection.c | 60 ++++++++++++++++++- sched/sched/sched_resumescheduler.c | 40 ------------- 23 files changed, 224 insertions(+), 63 deletions(-) diff --git a/arch/arm/src/arm/arm_syscall.c b/arch/arm/src/arm/arm_syscall.c index d12cf751fc..81828e0bfc 100644 --- a/arch/arm/src/arm/arm_syscall.c +++ b/arch/arm/src/arm/arm_syscall.c @@ -149,11 +149,18 @@ uint32_t *arm_syscall(uint32_t *regs) } #endif + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; /* Return the last value of curent_regs. This supports context switches diff --git a/arch/arm/src/armv6-m/arm_doirq.c b/arch/arm/src/armv6-m/arm_doirq.c index ce1967c62f..977976f043 100644 --- a/arch/arm/src/armv6-m/arm_doirq.c +++ b/arch/arm/src/armv6-m/arm_doirq.c @@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) if (regs == NULL) { - /* Update the return regs and restore the CURRENT_REGS to NULL. */ + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + + /* Update the CURRENT_REGS to NULL. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; } #endif diff --git a/arch/arm/src/armv7-a/arm_doirq.c b/arch/arm/src/armv7-a/arm_doirq.c index e8c7e1c66c..17b5759e22 100644 --- a/arch/arm/src/armv7-a/arm_doirq.c +++ b/arch/arm/src/armv7-a/arm_doirq.c @@ -89,11 +89,18 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) } #endif + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; #endif diff --git a/arch/arm/src/armv7-a/arm_syscall.c b/arch/arm/src/armv7-a/arm_syscall.c index b621d6044f..454f230131 100644 --- a/arch/arm/src/armv7-a/arm_syscall.c +++ b/arch/arm/src/armv7-a/arm_syscall.c @@ -555,7 +555,13 @@ uint32_t *arm_syscall(uint32_t *regs) } #endif - regs = (uint32_t *)CURRENT_REGS; + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } /* Report what happened */ diff --git a/arch/arm/src/armv7-m/arm_doirq.c b/arch/arm/src/armv7-m/arm_doirq.c index 1aeda39faa..358aefad65 100644 --- a/arch/arm/src/armv7-m/arm_doirq.c +++ b/arch/arm/src/armv7-m/arm_doirq.c @@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) if (regs == NULL) { - /* Update the return regs and restore the CURRENT_REGS to NULL. */ + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + + /* Update the CURRENT_REGS to NULL. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; } #endif diff --git a/arch/arm/src/armv7-r/arm_doirq.c b/arch/arm/src/armv7-r/arm_doirq.c index 6d59fc035d..d601badb67 100644 --- a/arch/arm/src/armv7-r/arm_doirq.c +++ b/arch/arm/src/armv7-r/arm_doirq.c @@ -60,11 +60,18 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) irq_dispatch(irq, regs); + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; board_autoled_off(LED_INIRQ); diff --git a/arch/arm/src/armv7-r/arm_syscall.c b/arch/arm/src/armv7-r/arm_syscall.c index 8283583ca8..720c3e8377 100644 --- a/arch/arm/src/armv7-r/arm_syscall.c +++ b/arch/arm/src/armv7-r/arm_syscall.c @@ -533,7 +533,13 @@ uint32_t *arm_syscall(uint32_t *regs) break; } - regs = (uint32_t *)CURRENT_REGS; + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } /* Report what happened */ diff --git a/arch/arm/src/armv8-m/arm_doirq.c b/arch/arm/src/armv8-m/arm_doirq.c index 5cc87802c9..0670c76aef 100644 --- a/arch/arm/src/armv8-m/arm_doirq.c +++ b/arch/arm/src/armv8-m/arm_doirq.c @@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs) if (regs == NULL) { - /* Update the return regs and restore the CURRENT_REGS to NULL. */ + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + + /* Update the CURRENT_REGS to NULL. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; } #endif diff --git a/arch/arm64/src/common/arm64_doirq.c b/arch/arm64/src/common/arm64_doirq.c index 15bc7fd84d..0bf46af4a4 100644 --- a/arch/arm64/src/common/arm64_doirq.c +++ b/arch/arm64/src/common/arm64_doirq.c @@ -106,13 +106,17 @@ uint64_t *arm64_doirq(int irq, uint64_t * regs) group_addrenv(NULL); #endif + + /* Restore the cpu lock */ + + restore_critical_section(); + regs = (uint64_t *)CURRENT_REGS; } /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint64_t *)CURRENT_REGS; CURRENT_REGS = NULL; return regs; diff --git a/arch/arm64/src/common/arm64_syscall.c b/arch/arm64/src/common/arm64_syscall.c index 15c7eef632..29944e23bf 100644 --- a/arch/arm64/src/common/arm64_syscall.c +++ b/arch/arm64/src/common/arm64_syscall.c @@ -202,6 +202,13 @@ uint64_t *arm64_syscall_switch(uint64_t * regs) break; } + /* Restore the cpu lock */ + + if ((uint64_t *)f_regs != ret_regs) + { + restore_critical_section(); + } + return ret_regs; } diff --git a/arch/ceva/src/common/up_doirq.c b/arch/ceva/src/common/up_doirq.c index 1bab68b323..f2ff756254 100644 --- a/arch/ceva/src/common/up_doirq.c +++ b/arch/ceva/src/common/up_doirq.c @@ -76,7 +76,11 @@ uint32_t *up_doirq(int irq, uint32_t *regs) * a context switch occurred during interrupt processing. */ - regs = CURRENT_REGS; + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = CURRENT_REGS; + } /* Restore the previous value of CURRENT_REGS. NULL would indicate * that we are no longer in an interrupt handler. diff --git a/arch/risc-v/src/common/riscv_doirq.c b/arch/risc-v/src/common/riscv_doirq.c index 2792ef46d3..4dad4c6887 100644 --- a/arch/risc-v/src/common/riscv_doirq.c +++ b/arch/risc-v/src/common/riscv_doirq.c @@ -102,13 +102,20 @@ uintptr_t *riscv_doirq(int irq, uintptr_t *regs) } #endif - /* If a context switch occurred while processing the interrupt then - * CURRENT_REGS may have change value. If we return any value different - * from the input regs, then the lower level will know that a context - * switch occurred during interrupt processing. - */ + if (regs != CURRENT_REGS) + { + /* Restore the cpu lock */ - regs = (uintptr_t *)CURRENT_REGS; + restore_critical_section(); + + /* If a context switch occurred while processing the interrupt then + * CURRENT_REGS may have change value. If we return any value + * different from the input regs, then the lower level will know + * that a context switch occurred during interrupt processing. + */ + + regs = (uintptr_t *)CURRENT_REGS; + } /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. diff --git a/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c b/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c index af32ec2cef..89dfd6ca5e 100644 --- a/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c +++ b/arch/risc-v/src/common/supervisor/riscv_perform_syscall.c @@ -56,9 +56,21 @@ void *riscv_perform_syscall(uintptr_t *regs) } #endif - /* Set new context */ + if (regs != CURRENT_REGS) + { + /* Restore the cpu lock */ + + restore_critical_section(); + + /* If a context switch occurred while processing the interrupt then + * CURRENT_REGS may have change value. If we return any value + * different from the input regs, then the lower level will know + * that a context switch occurred during interrupt processing. + */ + + regs = (uintptr_t *)CURRENT_REGS; + } - regs = (uintptr_t *)CURRENT_REGS; CURRENT_REGS = NULL; return regs; diff --git a/arch/sim/src/sim/up_blocktask.c b/arch/sim/src/sim/up_blocktask.c index 5463099c7a..779d283cce 100644 --- a/arch/sim/src/sim/up_blocktask.c +++ b/arch/sim/src/sim/up_blocktask.c @@ -124,6 +124,10 @@ void up_block_task(struct tcb_s *tcb, tstate_t task_state) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_exit.c b/arch/sim/src/sim/up_exit.c index 3ea743a151..d3e6f4fec5 100644 --- a/arch/sim/src/sim/up_exit.c +++ b/arch/sim/src/sim/up_exit.c @@ -76,6 +76,10 @@ void up_exit(int status) nxsched_resume_scheduler(tcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(tcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_releasepending.c b/arch/sim/src/sim/up_releasepending.c index 1e1352d514..1b664e62f0 100644 --- a/arch/sim/src/sim/up_releasepending.c +++ b/arch/sim/src/sim/up_releasepending.c @@ -93,6 +93,10 @@ void up_release_pending(void) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_reprioritizertr.c b/arch/sim/src/sim/up_reprioritizertr.c index ca8858a0be..db2f143688 100644 --- a/arch/sim/src/sim/up_reprioritizertr.c +++ b/arch/sim/src/sim/up_reprioritizertr.c @@ -146,6 +146,10 @@ void up_reprioritize_rtr(struct tcb_s *tcb, uint8_t priority) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/sim/src/sim/up_smpsignal.c b/arch/sim/src/sim/up_smpsignal.c index facb1ae690..54ba4aa8a3 100644 --- a/arch/sim/src/sim/up_smpsignal.c +++ b/arch/sim/src/sim/up_smpsignal.c @@ -199,6 +199,10 @@ int up_cpu_paused(int cpu) nxsched_resume_scheduler(tcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts. Any necessary address environment changes * will be made when the interrupt returns. */ diff --git a/arch/sim/src/sim/up_unblocktask.c b/arch/sim/src/sim/up_unblocktask.c index 7f5474e23c..3f1bd1929e 100644 --- a/arch/sim/src/sim/up_unblocktask.c +++ b/arch/sim/src/sim/up_unblocktask.c @@ -101,6 +101,10 @@ void up_unblock_task(struct tcb_s *tcb) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ up_restorestate(rtcb->xcp.regs); @@ -125,6 +129,10 @@ void up_unblock_task(struct tcb_s *tcb) nxsched_resume_scheduler(rtcb); + /* Restore the cpu lock */ + + restore_critical_section(); + /* Then switch contexts */ longjmp(rtcb->xcp.regs, 1); diff --git a/arch/xtensa/src/common/xtensa_irqdispatch.c b/arch/xtensa/src/common/xtensa_irqdispatch.c index 7c5e26b43b..4b4b4b90db 100644 --- a/arch/xtensa/src/common/xtensa_irqdispatch.c +++ b/arch/xtensa/src/common/xtensa_irqdispatch.c @@ -85,11 +85,18 @@ uint32_t *xtensa_irq_dispatch(int irq, uint32_t *regs) } #endif + /* Restore the cpu lock */ + + if (regs != CURRENT_REGS) + { + restore_critical_section(); + regs = (uint32_t *)CURRENT_REGS; + } + /* Set CURRENT_REGS to NULL to indicate that we are no longer in an * interrupt handler. */ - regs = (uint32_t *)CURRENT_REGS; CURRENT_REGS = NULL; #endif diff --git a/include/nuttx/irq.h b/include/nuttx/irq.h index ba43f42385..da44518239 100644 --- a/include/nuttx/irq.h +++ b/include/nuttx/irq.h @@ -219,6 +219,26 @@ void leave_critical_section(irqstate_t flags); # define leave_critical_section(f) up_irq_restore(f) #endif +/**************************************************************************** + * Name: restore_critical_section + * + * Description: + * Restore the critical_section + * + * Input Parameters: + * None + * + * Returned Value: + * None + * + ****************************************************************************/ + +#ifdef CONFIG_SMP +void restore_critical_section(void); +#else +# define restore_critical_section() +#endif + #undef EXTERN #ifdef __cplusplus } diff --git a/sched/irq/irq_csection.c b/sched/irq/irq_csection.c index e33f441cfe..f0b6fa001c 100644 --- a/sched/irq/irq_csection.c +++ b/sched/irq/irq_csection.c @@ -634,7 +634,7 @@ void leave_critical_section(irqstate_t flags) * holder of the IRQ lock. * * Input Parameters: - * rtcb - Points to the blocked TCB that is ready-to-run + * cpu - Points to which cpu * * Returned Value: * true - IRQs are locked by a different CPU. @@ -697,4 +697,62 @@ bool irq_cpu_locked(int cpu) } #endif +/**************************************************************************** + * Name: restore_critical_section + * + * Description: + * Restore the critical_section + * + * Input Parameters: + * None + * + * Returned Value: + * None + * + ****************************************************************************/ + +#ifdef CONFIG_SMP +void restore_critical_section(void) +{ + /* NOTE: The following logic for adjusting global IRQ controls were + * derived from nxsched_add_readytorun() and sched_removedreadytorun() + * Here, we only handles clearing logic to defer unlocking IRQ lock + * followed by context switching. + */ + + FAR struct tcb_s *tcb = this_task(); + int me = this_cpu(); + + /* Adjust global IRQ controls. If irqcount is greater than zero, + * then this task/this CPU holds the IRQ lock + */ + + if (tcb->irqcount > 0) + { + /* Do notihing here + * NOTE: spin_setbit() is done in nxsched_add_readytorun() + * and nxsched_remove_readytorun() + */ + } + + /* No.. This CPU will be relinquishing the lock. But this works + * differently if we are performing a context switch from an + * interrupt handler and the interrupt handler has established + * a critical section. We can detect this case when + * g_cpu_nestcount[me] > 0. + */ + + else if (g_cpu_nestcount[me] <= 0) + { + /* Release our hold on the IRQ lock. */ + + if ((g_cpu_irqset & (1 << me)) != 0) + { + spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock, + &g_cpu_irqlock); + } + } +} +#endif /* CONFIG_SMP */ + #endif /* CONFIG_IRQCOUNT */ diff --git a/sched/sched/sched_resumescheduler.c b/sched/sched/sched_resumescheduler.c index 4ff4e96613..77d6bc92d2 100644 --- a/sched/sched/sched_resumescheduler.c +++ b/sched/sched/sched_resumescheduler.c @@ -74,46 +74,6 @@ void nxsched_resume_scheduler(FAR struct tcb_s *tcb) #ifdef CONFIG_SCHED_INSTRUMENTATION sched_note_resume(tcb); #endif - -#ifdef CONFIG_SMP - /* NOTE: The following logic for adjusting global IRQ controls were - * derived from nxsched_add_readytorun() and sched_removedreadytorun() - * Here, we only handles clearing logic to defer unlocking IRQ lock - * followed by context switching. - */ - - int me = this_cpu(); - - /* Adjust global IRQ controls. If irqcount is greater than zero, - * then this task/this CPU holds the IRQ lock - */ - - if (tcb->irqcount > 0) - { - /* Do nothing here - * NOTE: spin_setbit() is done in nxsched_add_readytorun() - * and nxsched_remove_readytorun() - */ - } - - /* No.. This CPU will be relinquishing the lock. But this works - * differently if we are performing a context switch from an - * interrupt handler and the interrupt handler has established - * a critical section. We can detect this case when - * g_cpu_nestcount[me] > 0. - */ - - else if (g_cpu_nestcount[me] <= 0) - { - /* Release our hold on the IRQ lock. */ - - if ((g_cpu_irqset & (1 << me)) != 0) - { - spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock, - &g_cpu_irqlock); - } - } -#endif /* CONFIG_SMP */ } #endif /* CONFIG_RR_INTERVAL > 0 || CONFIG_SCHED_RESUMESCHEDULER */