diff --git a/TODO b/TODO index 77fb109fe4..d50c610998 100644 --- a/TODO +++ b/TODO @@ -10,7 +10,7 @@ issues related to each board port. nuttx/: (13) Task/Scheduler (sched/) - (2) SMP + (1) SMP (1) Memory Management (mm/) (1) Power Management (drivers/pm) (3) Signals (sched/signal, arch/) @@ -336,43 +336,6 @@ o SMP Priority: High. spinlocks, and hence SMP, will not work on such systems without this change. - Title: DEADLOCK SCENARIO WITH up_cpu_pause(). - Description: I think there is a possibilty for a hang in up_cpu_pause(). - Suppose this situation: - - - CPU1 is in a critical section and has the g_cpu_irqlock - spinlock. - - CPU0 takes an interrupt and attempts to enter the critical - section. It spins waiting on g_cpu_irqlock with interrupt - disabled. - - CPU1 calls up_cpu_pause() to pause operation on CPU1. This - will issue an inter-CPU interrupt to CPU0 - - But interrupts are disabled. What will happen? I think - that this is a deadlock: Interrupts will stay disabled on - CPU0 because it is spinning in the interrupt handler; - up_cpu_pause() will hang becuase the inter-CPU interrupt - is pending. - - Are inter-CPU interrupts maskable in the same way as other - interrupts? If the are not-maskable, then we must also handle - them as nested interrupts in some fashion. - - A work-around might be to check the state of other-CPU - interrupt handler inside the spin loop of up_cpu_pause(). - Having the other CPU spinning and waiting for up_cpu_pause() - provided that (1) the pending interrupt can be cleared, and - (2) leave_critical_section() is not called prior to the point - where up_cpu_resume() is called, and (3) up_cpu_resume() is - smart enough to know that it should not attempt to resume a - non-paused CPU. - - This would require some kind of information about each - interrupt handler: In an interrupt, waiting for spinlock, - have spinlock, etc. - - Status: Open - Priority: Medium-High. I don't know for certain that this is a problem but it seems like it could - o Memory Management (mm/) ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/arch/arm/src/armv7-a/arm_cpupause.c b/arch/arm/src/armv7-a/arm_cpupause.c index 7689598b55..4abfac25b1 100644 --- a/arch/arm/src/armv7-a/arm_cpupause.c +++ b/arch/arm/src/armv7-a/arm_cpupause.c @@ -55,6 +55,20 @@ * Private Data ****************************************************************************/ +/* These spinlocks are used in the SMP configuration in order to implement + * up_cpu_pause(). The protocol for CPUn to pause CPUm is as follows + * + * 1. The up_cpu_pause() implementation on CPUn locks both g_cpu_wait[m] + * and g_cpu_paused[m]. CPUn then waits spinning on g_cpu_wait[m]. + * 2. CPUm receives the interrupt it (1) unlocks g_cpu_paused[m] and + * (2) locks g_cpu_wait[m]. The first unblocks CPUn and the second + * blocks CPUm in the interrupt handler. + * + * When CPUm resumes, CPUn unlocks g_cpu_wait[m] and the interrupt handler + * on CPUm continues. CPUm must, of course, also then unlock g_cpu_wait[m] + * so that it will be ready for the next pause operation. + */ + static volatile spinlock_t g_cpu_wait[CONFIG_SMP_NCPUS]; static volatile spinlock_t g_cpu_paused[CONFIG_SMP_NCPUS]; @@ -62,6 +76,92 @@ static volatile spinlock_t g_cpu_paused[CONFIG_SMP_NCPUS]; * Public Functions ****************************************************************************/ +/**************************************************************************** + * Name: up_cpu_pausereq + * + * Description: + * Return true if a pause request is pending for this CPU. + * + * Input Parameters: + * cpu - The index of the CPU to be queried + * + * Returned Value: + * true = a pause request is pending. + * false = no pasue request is pending. + * + ****************************************************************************/ + +bool up_cpu_pausereq(int cpu) +{ + return spin_islocked(&g_cpu_paused[cpu]); +} + +/**************************************************************************** + * Name: up_cpu_paused + * + * Description: + * Handle a pause request from another CPU. Normally, this logic is + * executed from interrupt handling logic within the architecture-specific + * However, it is sometimes necessary necessary to perform the pending + * pause operation in other contexts where the interrupt cannot be taken + * in order to avoid deadlocks. + * + * This function performs the following operations: + * + * 1. It saves the current task state at the head of the current assigned + * task list. + * 2. It waits on a spinlock, then + * 3. Returns from interrupt, restoring the state of the new task at the + * head of the ready to run list. + * + * Input Parameters: + * cpu - The index of the CPU to be paused + * + * Returned Value: + * On success, OK is returned. Otherwise, a negated errno value indicating + * the nature of the failure is returned. + * + ****************************************************************************/ + +int up_cpu_paused(int cpu) +{ + FAR struct tcb_s *tcb = this_task(); + + /* Update scheduler parameters */ + + sched_suspend_scheduler(tcb); + + /* Save the current context at CURRENT_REGS into the TCB at the head + * of the assigned task list for this CPU. + */ + + up_savestate(tcb->xcp.regs); + + /* Wait for the spinlock to be released */ + + spin_unlock(&g_cpu_paused[cpu]); + spin_lock(&g_cpu_wait[cpu]); + + /* Restore the exception context of the tcb at the (new) head of the + * assigned task list. + */ + + tcb = this_task(); + + /* Reset scheduler parameters */ + + sched_resume_scheduler(tcb); + + /* Then switch contexts. Any necessary address environment changes + * will be made when the interrupt returns. + */ + + up_restorestate(tcb->xcp.regs); + spin_unlock(&g_cpu_wait[cpu]); + + return OK; +} + /**************************************************************************** * Name: arm_pause_handler * @@ -84,40 +184,18 @@ static volatile spinlock_t g_cpu_paused[CONFIG_SMP_NCPUS]; int arm_pause_handler(int irq, FAR void *context) { - FAR struct tcb_s *tcb = this_task(); - int cpu = up_cpu_index(); + int cpu = this_cpu(); - /* Update scheduler parameters */ - - sched_suspend_scheduler(tcb); - - /* Save the current context at CURRENT_REGS into the TCB at the head of the - * assigned task list for this CPU. + /* Check for false alarms. Such false could occur as a consequence of + * some deadlock breaking logic that might have already serviced the SG2 + * interrupt by calling up_cpu_paused. */ - up_savestate(tcb->xcp.regs); + if (spin_islocked(&g_cpu_paused[cpu])) + { + return up_cpu_paused(cpu); + } - /* Wait for the spinlock to be released */ - - spin_unlock(&g_cpu_paused[cpu]); - spin_lock(&g_cpu_wait[cpu]); - - /* Restore the exception context of the tcb at the (new) head of the - * assigned task list. - */ - - tcb = this_task(); - - /* Reset scheduler parameters */ - - sched_resume_scheduler(tcb); - - /* Then switch contexts. Any necessary address environment changes will - * be made when the interrupt returns. - */ - - up_restorestate(tcb->xcp.regs); - spin_unlock(&g_cpu_wait[cpu]); return OK; } @@ -134,7 +212,7 @@ int arm_pause_handler(int irq, FAR void *context) * CPU. * * Input Parameters: - * cpu - The index of the CPU to be stopped/ + * cpu - The index of the CPU to be stopped * * Returned Value: * Zero on success; a negated errno value on failure. diff --git a/arch/sim/src/up_internal.h b/arch/sim/src/up_internal.h index 658ec0539b..ac0a2269ab 100644 --- a/arch/sim/src/up_internal.h +++ b/arch/sim/src/up_internal.h @@ -199,6 +199,25 @@ extern volatile int g_eventloop; extern volatile int g_uart_data_available; #endif +#ifdef CONFIG_SMP +/* These spinlocks are used in the SMP configuration in order to implement + * up_cpu_pause(). The protocol for CPUn to pause CPUm is as follows + * + * 1. The up_cpu_pause() implementation on CPUn locks both g_cpu_wait[m] + * and g_cpu_paused[m]. CPUn then waits spinning on g_cpu_wait[m]. + * 2. CPUm receives the interrupt it (1) unlocks g_cpu_paused[m] and + * (2) locks g_cpu_wait[m]. The first unblocks CPUn and the second + * blocks CPUm in the interrupt handler. + * + * When CPUm resumes, CPUn unlocks g_cpu_wait[m] and the interrupt handler + * on CPUm continues. CPUm must, of course, also then unlock g_cpu_wait[m] + * so that it will be ready for the next pause operation. + */ + +volatile spinlock_t g_cpu_wait[CONFIG_SMP_NCPUS]; +volatile spinlock_t g_cpu_paused[CONFIG_SMP_NCPUS]; +#endif + /**************************************************************************** * Public Function Prototypes ****************************************************************************/ diff --git a/arch/sim/src/up_simsmp.c b/arch/sim/src/up_simsmp.c index 7ff5cdb6d0..5b24f6015c 100644 --- a/arch/sim/src/up_simsmp.c +++ b/arch/sim/src/up_simsmp.c @@ -78,16 +78,30 @@ struct sim_cpuinfo_s static pthread_key_t g_cpukey; static pthread_t g_sim_cputhread[CONFIG_SMP_NCPUS]; -static volatile unsigned char g_sim_cpupaused[CONFIG_SMP_NCPUS]; -static volatile spinlock_t g_sim_cpuwait[CONFIG_SMP_NCPUS]; + +/* These spinlocks are used in the SMP configuration in order to implement + * up_cpu_pause(). The protocol for CPUn to pause CPUm is as follows + * + * 1. The up_cpu_pause() implementation on CPUn locks both g_cpu_wait[m] + * and g_cpu_paused[m]. CPUn then waits spinning on g_cpu_wait[m]. + * 2. CPUm receives the interrupt it (1) unlocks g_cpu_paused[m] and + * (2) locks g_cpu_wait[m]. The first unblocks CPUn and the second + * blocks CPUm in the interrupt handler. + * + * When CPUm resumes, CPUn unlocks g_cpu_wait[m] and the interrupt handler + * on CPUm continues. CPUm must, of course, also then unlock g_cpu_wait[m] + * so that it will be ready for the next pause operation. + */ + +volatile spinlock_t g_cpu_wait[CONFIG_SMP_NCPUS]; +volatile spinlock_t g_cpu_paused[CONFIG_SMP_NCPUS]; /**************************************************************************** * NuttX domain function prototypes ****************************************************************************/ void os_start(void) __attribute__ ((noreturn)); -void sim_cpu_pause(int cpu, volatile spinlock_t *wait, - volatile unsigned char *paused); +void up_cpu_paused(int cpu); void sim_smp_hook(void); /**************************************************************************** @@ -222,9 +236,7 @@ static void sim_handle_signal(int signo, siginfo_t *info, void *context) { int cpu = (int)((uintptr_t)pthread_getspecific(g_cpukey)); - /* We need to perform the actual tasking operations in the NuttX domain */ - - sim_cpu_pause(cpu, &g_sim_cpuwait[cpu], &g_sim_cpupaused[cpu]); + (void)up_cpu_paused(cpu); } /**************************************************************************** @@ -446,7 +458,8 @@ int up_cpu_pause(int cpu) { /* Take the spinlock that will prevent the CPU thread from running */ - g_sim_cpuwait[cpu] = SP_LOCKED; + g_cpu_wait[cpu] = SP_LOCKED; + g_cpu_paused[cpu] = SP_LOCKED; /* Signal the CPU thread */ @@ -454,7 +467,7 @@ int up_cpu_pause(int cpu) /* Spin, waiting for the thread to pause */ - while (!g_sim_cpupaused[cpu]) + while (g_cpu_paused[cpu] != 0) { pthread_yield(); } @@ -485,6 +498,6 @@ int up_cpu_resume(int cpu) { /* Release the spinlock that will alloc the CPU thread to continue */ - g_sim_cpuwait[cpu] = SP_UNLOCKED; + g_cpu_wait[cpu] = SP_UNLOCKED; return 0; } diff --git a/arch/sim/src/up_smpsignal.c b/arch/sim/src/up_smpsignal.c index f921c7f3b4..5b5c761a07 100644 --- a/arch/sim/src/up_smpsignal.c +++ b/arch/sim/src/up_smpsignal.c @@ -52,26 +52,53 @@ ****************************************************************************/ /**************************************************************************** - * Name: sim_cpu_pause + * Name: up_cpu_pausereq * * Description: - * This is the SIGUSR1 signal handling logic. It implements the core - * logic of up_cpu_pause() on the thread of execution the simulated CPU. - * This is the part of the implementation that must be performed in the - * NuttX vs. the host domain. + * Return true if a pause request is pending for this CPU. * * Input Parameters: - * cpu - The CPU being paused. - * wait - Spinlock to wait on to be un-paused - * paused - A boolean to set when we are in the paused state. + * cpu - The index of the CPU to be queried * * Returned Value: - * None + * true = a pause request is pending. + * false = no pasue request is pending. * ****************************************************************************/ -void sim_cpu_pause(int cpu, volatile spinlock_t *wait, - volatile unsigned char *paused) +bool up_cpu_pausereq(int cpu) +{ + return spin_islocked(&g_cpu_paused[cpu]); +} + +/**************************************************************************** + * Name: up_cpu_paused + * + * Description: + * Handle a pause request from another CPU. Normally, this logic is + * executed from interrupt handling logic within the architecture-specific + * However, it is sometimes necessary necessary to perform the pending + * pause operation in other contexts where the interrupt cannot be taken + * in order to avoid deadlocks. + * + * This function performs the following operations: + * + * 1. It saves the current task state at the head of the current assigned + * task list. + * 2. It waits on a spinlock, then + * 3. Returns from interrupt, restoring the state of the new task at the + * head of the ready to run list. + * + * Input Parameters: + * cpu - The index of the CPU to be paused + * + * Returned Value: + * On success, OK is returned. Otherwise, a negated errno value indicating + * the nature of the failure is returned. + * + ****************************************************************************/ + +int up_cpu_paused(int cpu) { struct tcb_s *rtcb = current_task(cpu); @@ -86,16 +113,18 @@ void sim_cpu_pause(int cpu, volatile spinlock_t *wait, if (up_setjmp(rtcb->xcp.regs) == 0) { - /* Indicate that we are in the paused state */ + /* Unlock the g_cpu_paused spinlock to indicate that we are in the + * paused state + */ - *paused = 1; + spin_unlock(&g_cpu_paused[cpu]); /* Spin until we are asked to resume. When we resume, we need to * inicate that we are not longer paused. */ - spin_lock(wait); - *paused = 0; + spin_lock(&g_cpu_wait[cpu]); + spin_unlock(&g_cpu_wait[cpu]); /* While we were paused, logic on a different CPU probably changed * the task as that head of the assigned task list. So now we need @@ -125,7 +154,8 @@ void sim_cpu_pause(int cpu, volatile spinlock_t *wait, up_longjmp(rtcb->xcp.regs, 1); } + + return OK; } #endif /* CONFIG_SMP */ - diff --git a/include/nuttx/arch.h b/include/nuttx/arch.h index 8b83308fc8..4affda90da 100644 --- a/include/nuttx/arch.h +++ b/include/nuttx/arch.h @@ -1827,6 +1827,56 @@ int up_cpu_start(int cpu); int up_cpu_pause(int cpu); #endif +/**************************************************************************** + * Name: up_cpu_pausereq + * + * Description: + * Return true if a pause request is pending for this CPU. + * + * Input Parameters: + * cpu - The index of the CPU to be queried + * + * Returned Value: + * true = a pause request is pending. + * false = no pasue request is pending. + * + ****************************************************************************/ + +#ifdef CONFIG_SMP +bool up_cpu_pausereq(int cpu); +#endif + +/**************************************************************************** + * Name: up_cpu_paused + * + * Description: + * Handle a pause request from another CPU. Normally, this logic is + * executed from interrupt handling logic within the architecture-specific + * However, it is sometimes necessary necessary to perform the pending + * pause operation in other contexts where the interrupt cannot be taken + * in order to avoid deadlocks. + * + * This function performs the following operations: + * + * 1. It saves the current task state at the head of the current assigned + * task list. + * 2. It waits on a spinlock, then + * 3. Returns from interrupt, restoring the state of the new task at the + * head of the ready to run list. + * + * Input Parameters: + * cpu - The index of the CPU to be paused + * + * Returned Value: + * On success, OK is returned. Otherwise, a negated errno value indicating + * the nature of the failure is returned. + * + ****************************************************************************/ + +#ifdef CONFIG_SMP +int up_cpu_paused(int cpu); +#endif + /**************************************************************************** * Name: up_cpu_resume * diff --git a/include/nuttx/spinlock.h b/include/nuttx/spinlock.h index 0a9cc88a5e..1ad7fef525 100644 --- a/include/nuttx/spinlock.h +++ b/include/nuttx/spinlock.h @@ -59,6 +59,26 @@ #include +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +/* Memory barriers may be provided in arch/spinlock.h + * + * DMB - Data memory barrier. Assures writes are completed to memory. + * DSB - Data syncrhonization barrier. + */ + +#define HAVE_DMB 1 +#ifndef SP_DMB +# define SP_DMB() +# undef HAVE_DMB +#endif + +#ifndef SP_DSB +# define SP_DSB() +#endif + /**************************************************************************** * Public Types ****************************************************************************/ @@ -203,7 +223,7 @@ void spin_lockr(FAR struct spinlock_s *lock); * ****************************************************************************/ -#ifdef SP_DMB +#ifdef HAVE_DMB void spin_unlock(FAR volatile spinlock_t *lock); #else # define spin_unlock(l) do { *(l) = SP_UNLOCKED; } while (0) diff --git a/sched/irq/irq_csection.c b/sched/irq/irq_csection.c index 8071da9758..cabe791587 100644 --- a/sched/irq/irq_csection.c +++ b/sched/irq/irq_csection.c @@ -78,6 +78,81 @@ volatile cpu_set_t g_cpu_irqset; static uint8_t g_cpu_nestcount[CONFIG_SMP_NCPUS]; #endif +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: irq_waitlock + * + * Description: + * Spin to get g_irq_waitlock, handling a known deadlock condition: + * + * Suppose this situation: + * + * - CPUn is in a critical section and has the g_cpu_irqlock spinlock. + * - CPUm takes an interrupt and attempts to enter the critical section. + * - It spins waiting on g_cpu_irqlock with interrupts disabled. + * - CPUn calls up_cpu_pause() to pause operation on CPUm. This will + * issue an inter-CPU interrupt to CPUm + * - But interrupts are disabled on CPUm so the up_cpu_pause() is never + * handled, causing the deadlock. + * + * This function detects this deadlock condition while spinning in an + * interrupt and calls up_cpu_pause() handler, breaking the deadlock. + * + ****************************************************************************/ + +#ifdef CONFIG_SMP +static void irq_waitlock(void) +{ + int cpu = this_cpu(); + + /* Duplicate the spin_lock() logic from spinlock.c, but adding the check + * for the deadlock condition. + */ + + while (up_testset(&g_cpu_irqlock) == SP_LOCKED) + { + /* The deadlock condition would occur if CPUn: + * + * 1. Holds the g_cpu_irqlock, and + * 2. Is trying to interrupt CPUm + * + * The protocol for CPUn to pause CPUm is as follows + * + * 1. The up_cpu_pause() implementation on CPUn locks both + * g_cpu_wait[m] and g_cpu_paused[m]. CPUn then waits + * spinning on g_cpu_wait[m]. + * 2. When CPUm receives the interrupt it (1) unlocks + * g_cpu_paused[m] and (2) locks g_cpu_wait[m]. The + * first unblocks CPUn and the second blocks CPUm in the + * interrupt handler. + * + * The problem in the deadlock case is that CPUm cannot receive + * the interrupt because it is locked up spinning. He we break + * the deadlock here be handling the pause interrupt request + * while waiting. + */ + + if (up_cpu_pausereq(cpu)) + { + /* Yes.. some CPU is requesting to pause us! Handle the + * pause interrupt now. + */ + + DEBUGVERIFY(up_cpu_paused(cpu)); + } + + SP_DSB(); + } + + /* We have g_cpu_irqlock! */ + + SP_DMB(); +} +#endif + /**************************************************************************** * Public Functions ****************************************************************************/ @@ -190,7 +265,7 @@ irqstate_t enter_critical_section(void) * no longer in the critical section). */ - spin_lock(&g_cpu_irqlock); + irq_waitlock(); } /* In any event, the nesting count is now one */ diff --git a/sched/semaphore/spinlock.c b/sched/semaphore/spinlock.c index a3c5225adf..e03f14c72f 100644 --- a/sched/semaphore/spinlock.c +++ b/sched/semaphore/spinlock.c @@ -65,22 +65,6 @@ #undef CONFIG_SPINLOCK_LOCKDOWN /* Feature not yet available */ -/* Memory barriers may be provided in arch/spinlock.h - * - * DMB - Data memory barrier. Assures writes are completed to memory. - * DSB - Data syncrhonization barrier. - */ - -#define HAVE_DMB 1 -#ifndef SP_DMB -# define SP_DMB() -# undef HAVE_DMB -#endif - -#ifndef SP_DSB -# define SP_DSB() -#endif - /**************************************************************************** * Public Functions ****************************************************************************/ diff --git a/sched/task/task_restart.c b/sched/task/task_restart.c index 36251348ed..21aec95b47 100644 --- a/sched/task/task_restart.c +++ b/sched/task/task_restart.c @@ -97,7 +97,7 @@ int task_restart(pid_t pid) /* Not implemented */ errcode = ENOSYS; - goto errout_with_lock; + goto errout; } /* We are restarting some other task than ourselves. Make sure that the @@ -222,7 +222,8 @@ int task_restart(pid_t pid) return OK; errout_with_lock: - set_errno(errcode); leave_critical_section(flags); +errout: + set_errno(errcode); return ERROR; }