From b9707776d6b0009cb7f1fafcf731a420f0cf57b4 Mon Sep 17 00:00:00 2001 From: Masayuki Ishikawa Date: Wed, 14 Feb 2018 14:10:32 +0000 Subject: [PATCH] Merged in masayuki2009/nuttx.nuttx/fix_signal_handing_for_smp (pull request #599) Fix signal handing for smp * sched/signal: Remove SMP related logic in sig_dispatch.c This change prevents from a deadlock in up_schedulesigaction.c where inter-CPU signal handling is actually implemented. Signed-off-by: Masayuki Ishikawa * arch/arm/src/armv7-m: Fix signal handling for SMP In previous implementation, signal handling for SMP was incorrect. Thus, for example, if an inter-CPU signal happened an incorret tcb was signaled and caused ASSERT(). This change fixes the issues and works for both inter-CPU signal handling and signal handling on the same CPU. Signed-off-by: Masayuki Ishikawa Approved-by: Gregory Nutt --- arch/arm/src/armv7-m/up_schedulesigaction.c | 105 +++++++++++--------- sched/signal/sig_dispatch.c | 26 +---- 2 files changed, 61 insertions(+), 70 deletions(-) diff --git a/arch/arm/src/armv7-m/up_schedulesigaction.c b/arch/arm/src/armv7-m/up_schedulesigaction.c index 42aa2932ba..83d51e8c12 100644 --- a/arch/arm/src/armv7-m/up_schedulesigaction.c +++ b/arch/arm/src/armv7-m/up_schedulesigaction.c @@ -266,12 +266,6 @@ void up_schedule_sigaction(struct tcb_s *tcb, sig_deliver_t sigdeliver) * CPU. In the former case, we will have to PAUSE the other CPU * first. But in either case, we will have to modify the return * state as well as the state in the TCB. - * - * Hmmm... there looks like a latent bug here: The following logic - * would fail in the strange case where we are in an interrupt - * handler, the thread is signalling itself, but a context switch - * to another task has occurred so that CURRENT_REGS does not - * refer to the thread of this_task()! */ else @@ -282,44 +276,33 @@ void up_schedule_sigaction(struct tcb_s *tcb, sig_deliver_t sigdeliver) if (cpu != me) { + /* Pause the CPU */ + up_cpu_pause(cpu); - } - /* Save the return PC, CPSR and either the BASEPRI or PRIMASK - * registers (and perhaps also the LR). These will be - * restored by the signal trampoline after the signal has been - * delivered. - */ + /* Wait while the pause request is pending */ - tcb->xcp.sigdeliver = (FAR void *)sigdeliver; - tcb->xcp.saved_pc = CURRENT_REGS[REG_PC]; + while (up_cpu_pausereq(cpu)); + + /* Now tcb on the other CPU can be accessed safely */ + + /* Copy tcb->xcp.regs to tcp.xcp.saved. These will be restored + * by the signal trampoline after the signal has been delivered. + */ + + tcb->xcp.sigdeliver = (FAR void *)sigdeliver; + tcb->xcp.saved_pc = tcb->xcp.regs[REG_PC]; #ifdef CONFIG_ARMV7M_USEBASEPRI - tcb->xcp.saved_basepri = CURRENT_REGS[REG_BASEPRI]; + tcb->xcp.saved_basepri = tcb->xcp.regs[REG_BASEPRI]; #else - tcb->xcp.saved_primask = CURRENT_REGS[REG_PRIMASK]; + tcb->xcp.saved_primask = tcb->xcp.regs[REG_PRIMASK]; #endif - tcb->xcp.saved_xpsr = CURRENT_REGS[REG_XPSR]; + tcb->xcp.saved_xpsr = tcb->xcp.regs[REG_XPSR]; #ifdef CONFIG_BUILD_PROTECTED - tcb->xcp.saved_lr = CURRENT_REGS[REG_LR]; + tcb->xcp.saved_lr = tcb->xcp.regs[REG_LR]; #endif - /* Increment the IRQ lock count so that when the task is restarted, - * it will hold the IRQ spinlock. - */ - DEBUGASSERT(tcb->irqcount < INT16_MAX); - tcb->irqcount++; - - /* Handle a possible race condition where the TCB was suspended - * just before we paused the other CPU. The critical section - * established above will prevent new threads from running on - * that CPU, but it will not guarantee that the running thread - * did not suspend itself (allowing any threads "assigned" to - * the CPU to run). - */ - - if (tcb->task_state != TSTATE_TASK_RUNNING) - { - /* Then set up to vector to the trampoline with interrupts + /* Then set up vector to the trampoline with interrupts * disabled. We must already be in privileged thread mode * to be here. */ @@ -337,8 +320,29 @@ void up_schedule_sigaction(struct tcb_s *tcb, sig_deliver_t sigdeliver) } else { - /* Then set up to vector to the trampoline with interrupts - * disabled + /* tcb is running on the same CPU */ + + /* Save the return PC, CPSR and either the BASEPRI or PRIMASK + * registers (and perhaps also the LR). These will be + * restored by the signal trampoline after the signal has been + * delivered. + */ + + tcb->xcp.sigdeliver = (FAR void *)sigdeliver; + tcb->xcp.saved_pc = CURRENT_REGS[REG_PC]; +#ifdef CONFIG_ARMV7M_USEBASEPRI + tcb->xcp.saved_basepri = CURRENT_REGS[REG_BASEPRI]; +#else + tcb->xcp.saved_primask = CURRENT_REGS[REG_PRIMASK]; +#endif + tcb->xcp.saved_xpsr = CURRENT_REGS[REG_XPSR]; +#ifdef CONFIG_BUILD_PROTECTED + tcb->xcp.saved_lr = CURRENT_REGS[REG_LR]; +#endif + + /* Then set up vector to the trampoline with interrupts + * disabled. The kernel-space trampoline must run in + * privileged thread mode. */ CURRENT_REGS[REG_PC] = (uint32_t)up_sigdeliver; @@ -351,15 +355,6 @@ void up_schedule_sigaction(struct tcb_s *tcb, sig_deliver_t sigdeliver) #ifdef CONFIG_BUILD_PROTECTED CURRENT_REGS[REG_LR] = EXC_RETURN_PRIVTHR; #endif - /* In an SMP configuration, the interrupt disable logic also - * involves spinlocks that are configured per the TCB irqcount - * field. This is logically equivalent to enter_critical_section(). - * The matching call to leave_critical_section() will be - * performed in up_sigdeliver(). - */ - - spin_setbit(&g_cpu_irqset, cpu, &g_cpu_irqsetlock, - &g_cpu_irqlock); /* And make sure that the saved context in the TCB is the same * as the interrupt return context. @@ -368,6 +363,24 @@ void up_schedule_sigaction(struct tcb_s *tcb, sig_deliver_t sigdeliver) up_savestate(tcb->xcp.regs); } + /* Increment the IRQ lock count so that when the task is restarted, + * it will hold the IRQ spinlock. + */ + + DEBUGASSERT(tcb->irqcount < INT16_MAX); + tcb->irqcount++; + + /* In an SMP configuration, the interrupt disable logic also + * involves spinlocks that are configured per the TCB irqcount + * field. This is logically equivalent to enter_critical_section(). + * The matching call to leave_critical_section() will be + * performed in up_sigdeliver(). + */ + + spin_setbit(&g_cpu_irqset, cpu, &g_cpu_irqsetlock, + &g_cpu_irqlock); + + /* RESUME the other CPU if it was PAUSED */ if (cpu != me) diff --git a/sched/signal/sig_dispatch.c b/sched/signal/sig_dispatch.c index ebe31093ae..e11d25e223 100644 --- a/sched/signal/sig_dispatch.c +++ b/sched/signal/sig_dispatch.c @@ -345,9 +345,6 @@ int nxsig_tcbdispatch(FAR struct tcb_s *stcb, siginfo_t *info) else { -#ifdef CONFIG_SMP - int cpu; -#endif /* Queue any sigaction's requested by this task. */ ret = nxsig_queue_action(stcb, info); @@ -356,32 +353,13 @@ int nxsig_tcbdispatch(FAR struct tcb_s *stcb, siginfo_t *info) flags = enter_critical_section(); -#ifdef CONFIG_SMP - /* If the thread is running on another CPU, then pause that CPU. We can - * then setup the for signal delivery on the running thread. When the - * CPU is resumed, the signal handler will then execute. - */ - - cpu = sched_cpu_pause(stcb); -#endif /* CONFIG_SMP */ - /* Then schedule execution of the signal handling action on the - * recipient's thread. + * recipient's thread. SMP related handling will be done in + * up_schedule_sigaction() */ up_schedule_sigaction(stcb, nxsig_deliver); -#ifdef CONFIG_SMP - /* Resume the paused CPU (if any) */ - - if (cpu >= 0) - { - /* I am not yet sure how to handle a failure here. */ - - DEBUGVERIFY(up_cpu_resume(cpu)); - } -#endif /* CONFIG_SMP */ - /* Check if the task is waiting for an unmasked signal. If so, then * unblock it. This must be performed in a critical section because * signals can be queued from the interrupt level.