SMP: fix crash when switch to new task which is still running

Situation:

Assume we have 2 cpus, and busy run task0.

CPU0                                CPU1
task0 -> task1                      task2 -> task0
1. remove task0 form runninglist
2. take task1 as new tcb
3. add task0 to blocklist
4. clear spinlock
                                    4.1 remove task2 form runninglist
                                    4.2 take task0 as new tcb
                                    4.3 add task2 to blocklist
                                    4.4 use svc ISR swith to task0
                                    4.5 crash
5. use svc ISR swith to task1

Fix:
Move clear spinlock to the end of svc ISR

Signed-off-by: ligd <liguiding1@xiaomi.com>
This commit is contained in:
ligd 2022-08-10 17:22:48 +08:00 committed by Masayuki Ishikawa
parent 615bc9f0e2
commit e2df52390a
23 changed files with 224 additions and 63 deletions

View File

@ -149,11 +149,18 @@ uint32_t *arm_syscall(uint32_t *regs)
}
#endif
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Set CURRENT_REGS to NULL to indicate that we are no longer in an
* interrupt handler.
*/
regs = (uint32_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
/* Return the last value of curent_regs. This supports context switches

View File

@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs)
if (regs == NULL)
{
/* Update the return regs and restore the CURRENT_REGS to NULL. */
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Update the CURRENT_REGS to NULL. */
regs = (uint32_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
}
#endif

View File

@ -89,11 +89,18 @@ uint32_t *arm_doirq(int irq, uint32_t *regs)
}
#endif
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Set CURRENT_REGS to NULL to indicate that we are no longer in an
* interrupt handler.
*/
regs = (uint32_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
#endif

View File

@ -555,7 +555,13 @@ uint32_t *arm_syscall(uint32_t *regs)
}
#endif
regs = (uint32_t *)CURRENT_REGS;
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Report what happened */

View File

@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs)
if (regs == NULL)
{
/* Update the return regs and restore the CURRENT_REGS to NULL. */
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Update the CURRENT_REGS to NULL. */
regs = (uint32_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
}
#endif

View File

@ -60,11 +60,18 @@ uint32_t *arm_doirq(int irq, uint32_t *regs)
irq_dispatch(irq, regs);
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Set CURRENT_REGS to NULL to indicate that we are no longer in an
* interrupt handler.
*/
regs = (uint32_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
board_autoled_off(LED_INIRQ);

View File

@ -533,7 +533,13 @@ uint32_t *arm_syscall(uint32_t *regs)
break;
}
regs = (uint32_t *)CURRENT_REGS;
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Report what happened */

View File

@ -77,9 +77,16 @@ uint32_t *arm_doirq(int irq, uint32_t *regs)
if (regs == NULL)
{
/* Update the return regs and restore the CURRENT_REGS to NULL. */
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Update the CURRENT_REGS to NULL. */
regs = (uint32_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
}
#endif

View File

@ -106,13 +106,17 @@ uint64_t *arm64_doirq(int irq, uint64_t * regs)
group_addrenv(NULL);
#endif
/* Restore the cpu lock */
restore_critical_section();
regs = (uint64_t *)CURRENT_REGS;
}
/* Set CURRENT_REGS to NULL to indicate that we are no longer in an
* interrupt handler.
*/
regs = (uint64_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
return regs;

View File

@ -202,6 +202,13 @@ uint64_t *arm64_syscall_switch(uint64_t * regs)
break;
}
/* Restore the cpu lock */
if ((uint64_t *)f_regs != ret_regs)
{
restore_critical_section();
}
return ret_regs;
}

View File

@ -76,7 +76,11 @@ uint32_t *up_doirq(int irq, uint32_t *regs)
* a context switch occurred during interrupt processing.
*/
regs = CURRENT_REGS;
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = CURRENT_REGS;
}
/* Restore the previous value of CURRENT_REGS. NULL would indicate
* that we are no longer in an interrupt handler.

View File

@ -102,13 +102,20 @@ uintptr_t *riscv_doirq(int irq, uintptr_t *regs)
}
#endif
/* If a context switch occurred while processing the interrupt then
* CURRENT_REGS may have change value. If we return any value different
* from the input regs, then the lower level will know that a context
* switch occurred during interrupt processing.
*/
if (regs != CURRENT_REGS)
{
/* Restore the cpu lock */
regs = (uintptr_t *)CURRENT_REGS;
restore_critical_section();
/* If a context switch occurred while processing the interrupt then
* CURRENT_REGS may have change value. If we return any value
* different from the input regs, then the lower level will know
* that a context switch occurred during interrupt processing.
*/
regs = (uintptr_t *)CURRENT_REGS;
}
/* Set CURRENT_REGS to NULL to indicate that we are no longer in an
* interrupt handler.

View File

@ -56,9 +56,21 @@ void *riscv_perform_syscall(uintptr_t *regs)
}
#endif
/* Set new context */
if (regs != CURRENT_REGS)
{
/* Restore the cpu lock */
restore_critical_section();
/* If a context switch occurred while processing the interrupt then
* CURRENT_REGS may have change value. If we return any value
* different from the input regs, then the lower level will know
* that a context switch occurred during interrupt processing.
*/
regs = (uintptr_t *)CURRENT_REGS;
}
regs = (uintptr_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
return regs;

View File

@ -124,6 +124,10 @@ void up_block_task(struct tcb_s *tcb, tstate_t task_state)
nxsched_resume_scheduler(rtcb);
/* Restore the cpu lock */
restore_critical_section();
/* Then switch contexts */
longjmp(rtcb->xcp.regs, 1);

View File

@ -76,6 +76,10 @@ void up_exit(int status)
nxsched_resume_scheduler(tcb);
/* Restore the cpu lock */
restore_critical_section();
/* Then switch contexts */
longjmp(tcb->xcp.regs, 1);

View File

@ -93,6 +93,10 @@ void up_release_pending(void)
nxsched_resume_scheduler(rtcb);
/* Restore the cpu lock */
restore_critical_section();
/* Then switch contexts */
longjmp(rtcb->xcp.regs, 1);

View File

@ -146,6 +146,10 @@ void up_reprioritize_rtr(struct tcb_s *tcb, uint8_t priority)
nxsched_resume_scheduler(rtcb);
/* Restore the cpu lock */
restore_critical_section();
/* Then switch contexts */
longjmp(rtcb->xcp.regs, 1);

View File

@ -199,6 +199,10 @@ int up_cpu_paused(int cpu)
nxsched_resume_scheduler(tcb);
/* Restore the cpu lock */
restore_critical_section();
/* Then switch contexts. Any necessary address environment changes
* will be made when the interrupt returns.
*/

View File

@ -101,6 +101,10 @@ void up_unblock_task(struct tcb_s *tcb)
nxsched_resume_scheduler(rtcb);
/* Restore the cpu lock */
restore_critical_section();
/* Then switch contexts */
up_restorestate(rtcb->xcp.regs);
@ -125,6 +129,10 @@ void up_unblock_task(struct tcb_s *tcb)
nxsched_resume_scheduler(rtcb);
/* Restore the cpu lock */
restore_critical_section();
/* Then switch contexts */
longjmp(rtcb->xcp.regs, 1);

View File

@ -85,11 +85,18 @@ uint32_t *xtensa_irq_dispatch(int irq, uint32_t *regs)
}
#endif
/* Restore the cpu lock */
if (regs != CURRENT_REGS)
{
restore_critical_section();
regs = (uint32_t *)CURRENT_REGS;
}
/* Set CURRENT_REGS to NULL to indicate that we are no longer in an
* interrupt handler.
*/
regs = (uint32_t *)CURRENT_REGS;
CURRENT_REGS = NULL;
#endif

View File

@ -219,6 +219,26 @@ void leave_critical_section(irqstate_t flags);
# define leave_critical_section(f) up_irq_restore(f)
#endif
/****************************************************************************
* Name: restore_critical_section
*
* Description:
* Restore the critical_section
*
* Input Parameters:
* None
*
* Returned Value:
* None
*
****************************************************************************/
#ifdef CONFIG_SMP
void restore_critical_section(void);
#else
# define restore_critical_section()
#endif
#undef EXTERN
#ifdef __cplusplus
}

View File

@ -634,7 +634,7 @@ void leave_critical_section(irqstate_t flags)
* holder of the IRQ lock.
*
* Input Parameters:
* rtcb - Points to the blocked TCB that is ready-to-run
* cpu - Points to which cpu
*
* Returned Value:
* true - IRQs are locked by a different CPU.
@ -697,4 +697,62 @@ bool irq_cpu_locked(int cpu)
}
#endif
/****************************************************************************
* Name: restore_critical_section
*
* Description:
* Restore the critical_section
*
* Input Parameters:
* None
*
* Returned Value:
* None
*
****************************************************************************/
#ifdef CONFIG_SMP
void restore_critical_section(void)
{
/* NOTE: The following logic for adjusting global IRQ controls were
* derived from nxsched_add_readytorun() and sched_removedreadytorun()
* Here, we only handles clearing logic to defer unlocking IRQ lock
* followed by context switching.
*/
FAR struct tcb_s *tcb = this_task();
int me = this_cpu();
/* Adjust global IRQ controls. If irqcount is greater than zero,
* then this task/this CPU holds the IRQ lock
*/
if (tcb->irqcount > 0)
{
/* Do notihing here
* NOTE: spin_setbit() is done in nxsched_add_readytorun()
* and nxsched_remove_readytorun()
*/
}
/* No.. This CPU will be relinquishing the lock. But this works
* differently if we are performing a context switch from an
* interrupt handler and the interrupt handler has established
* a critical section. We can detect this case when
* g_cpu_nestcount[me] > 0.
*/
else if (g_cpu_nestcount[me] <= 0)
{
/* Release our hold on the IRQ lock. */
if ((g_cpu_irqset & (1 << me)) != 0)
{
spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock,
&g_cpu_irqlock);
}
}
}
#endif /* CONFIG_SMP */
#endif /* CONFIG_IRQCOUNT */

View File

@ -74,46 +74,6 @@ void nxsched_resume_scheduler(FAR struct tcb_s *tcb)
#ifdef CONFIG_SCHED_INSTRUMENTATION
sched_note_resume(tcb);
#endif
#ifdef CONFIG_SMP
/* NOTE: The following logic for adjusting global IRQ controls were
* derived from nxsched_add_readytorun() and sched_removedreadytorun()
* Here, we only handles clearing logic to defer unlocking IRQ lock
* followed by context switching.
*/
int me = this_cpu();
/* Adjust global IRQ controls. If irqcount is greater than zero,
* then this task/this CPU holds the IRQ lock
*/
if (tcb->irqcount > 0)
{
/* Do nothing here
* NOTE: spin_setbit() is done in nxsched_add_readytorun()
* and nxsched_remove_readytorun()
*/
}
/* No.. This CPU will be relinquishing the lock. But this works
* differently if we are performing a context switch from an
* interrupt handler and the interrupt handler has established
* a critical section. We can detect this case when
* g_cpu_nestcount[me] > 0.
*/
else if (g_cpu_nestcount[me] <= 0)
{
/* Release our hold on the IRQ lock. */
if ((g_cpu_irqset & (1 << me)) != 0)
{
spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock,
&g_cpu_irqlock);
}
}
#endif /* CONFIG_SMP */
}
#endif /* CONFIG_RR_INTERVAL > 0 || CONFIG_SCHED_RESUMESCHEDULER */