SMP: Introduce a new global IRQ clearing logic and tasklist protection.
The previous implementation of clearing global IRQ in sched_addreadytorun() and sched_removereadytorun() was done too early. As a result, nxsem_post() would have a chance to enter the critical section even nxsem_wait() is still not in blocked state. This patch moves clearing global IRQ controls from sched_addreadytorun() and sched_removereadytorun() to sched_resumescheduler() to ensure that nxsem_post() can enter the critical section correctly. For this change, sched_resumescheduler.c is always necessary for SMP configuration. In addition, by this change, task_exit() had to be modified so that it calls sched_resumescheduler() because it calls sched_removescheduler() inside the function, otherwise it will cause a deadlock. However, I encountered another DEBUGASSERT() in sched_cpu_select() during HTTP streaming aging test on lc823450-xgevk. Actually sched_cpu_select() accesses the g_assignedtasks which might be changed by another CPU. Similarly, other tasklists might be modified simultaneously if both CPUs are executing scheduling logic. To avoid this, I introduced tasklist protetion APIs. With these changes, SMP kernel stability has been much improved. Signed-off-by: Masayuki Ishikawa <Masayuki.Ishikawa@jp.sony.com>
This commit is contained in:
parent
3521aaf944
commit
d295f11a3a
@ -883,7 +883,7 @@ void task_vforkabort(FAR struct task_tcb_s *child, int errcode);
|
||||
********************************************************************************/
|
||||
|
||||
#if CONFIG_RR_INTERVAL > 0 || defined(CONFIG_SCHED_SPORADIC) || \
|
||||
defined(CONFIG_SCHED_INSTRUMENTATION)
|
||||
defined(CONFIG_SCHED_INSTRUMENTATION) || defined(CONFIG_SMP)
|
||||
void sched_resume_scheduler(FAR struct tcb_s *tcb);
|
||||
#else
|
||||
# define sched_resume_scheduler(tcb)
|
||||
|
@ -77,6 +77,8 @@ else ifeq ($(CONFIG_SCHED_SPORADIC),y)
|
||||
CSRCS += sched_resumescheduler.c
|
||||
else ifeq ($(CONFIG_SCHED_INSTRUMENTATION),y)
|
||||
CSRCS += sched_resumescheduler.c
|
||||
else ifeq ($(CONFIG_SMP),y)
|
||||
CSRCS += sched_resumescheduler.c
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_SCHED_CPULOAD),y)
|
||||
@ -96,6 +98,10 @@ ifeq ($(CONFIG_SCHED_INSTRUMENTATION_BUFFER),y)
|
||||
CSRCS += sched_note.c
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_SMP),y)
|
||||
CSRCS += sched_tasklistlock.c
|
||||
endif
|
||||
|
||||
# Include sched build support
|
||||
|
||||
DEPPATH += --dep-path sched
|
||||
|
@ -364,6 +364,10 @@ extern volatile spinlock_t g_cpu_schedlock SP_SECTION;
|
||||
extern volatile spinlock_t g_cpu_locksetlock SP_SECTION;
|
||||
extern volatile cpu_set_t g_cpu_lockset SP_SECTION;
|
||||
|
||||
/* Used to lock tasklist to prevent from concurrent access */
|
||||
|
||||
extern volatile spinlock_t g_cpu_tasklistlock SP_SECTION;
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/****************************************************************************
|
||||
@ -425,6 +429,10 @@ void sched_sporadic_lowpriority(FAR struct tcb_s *tcb);
|
||||
#ifdef CONFIG_SMP
|
||||
int sched_cpu_select(cpu_set_t affinity);
|
||||
int sched_cpu_pause(FAR struct tcb_s *tcb);
|
||||
|
||||
irqstate_t sched_tasklist_lock(void);
|
||||
void sched_tasklist_unlock(irqstate_t lock);
|
||||
|
||||
# define sched_islocked(tcb) spin_islocked(&g_cpu_schedlock)
|
||||
#else
|
||||
# define sched_cpu_select(a) (0)
|
||||
|
@ -77,6 +77,12 @@ void sched_addblocked(FAR struct tcb_s *btcb, tstate_t task_state)
|
||||
DEBUGASSERT(task_state >= FIRST_BLOCKED_STATE &&
|
||||
task_state <= LAST_BLOCKED_STATE);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Lock the tasklists before accessing */
|
||||
|
||||
irqstate_t lock = sched_tasklist_lock();
|
||||
#endif
|
||||
|
||||
/* Add the TCB to the blocked task list associated with this state. */
|
||||
|
||||
tasklist = TLIST_BLOCKED(task_state);
|
||||
@ -96,6 +102,12 @@ void sched_addblocked(FAR struct tcb_s *btcb, tstate_t task_state)
|
||||
dq_addlast((FAR dq_entry_t *)btcb, tasklist);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Unlock the tasklists */
|
||||
|
||||
sched_tasklist_unlock(lock);
|
||||
#endif
|
||||
|
||||
/* Make sure the TCB's state corresponds to the list */
|
||||
|
||||
btcb->task_state = task_state;
|
||||
|
@ -173,6 +173,10 @@ bool sched_addreadytorun(FAR struct tcb_s *btcb)
|
||||
int cpu;
|
||||
int me;
|
||||
|
||||
/* Lock the tasklists before accessing */
|
||||
|
||||
irqstate_t lock = sched_tasklist_lock();
|
||||
|
||||
/* Check if the blocked TCB is locked to this CPU */
|
||||
|
||||
if ((btcb->flags & TCB_FLAG_CPU_LOCKED) != 0)
|
||||
@ -339,10 +343,9 @@ bool sched_addreadytorun(FAR struct tcb_s *btcb)
|
||||
|
||||
else if (g_cpu_nestcount[me] <= 0)
|
||||
{
|
||||
/* Release our hold on the IRQ lock. */
|
||||
|
||||
spin_clrbit(&g_cpu_irqset, cpu, &g_cpu_irqsetlock,
|
||||
&g_cpu_irqlock);
|
||||
/* Do nothing here
|
||||
* NOTE: spin_clrbit() will be done in sched_resumescheduler()
|
||||
*/
|
||||
}
|
||||
|
||||
/* Sanity check. g_cpu_netcount should be greater than zero
|
||||
@ -426,6 +429,9 @@ bool sched_addreadytorun(FAR struct tcb_s *btcb)
|
||||
}
|
||||
}
|
||||
|
||||
/* Unlock the tasklists */
|
||||
|
||||
sched_tasklist_unlock(lock);
|
||||
return doswitch;
|
||||
}
|
||||
|
||||
|
@ -199,6 +199,10 @@ bool sched_mergepending(void)
|
||||
int cpu;
|
||||
int me;
|
||||
|
||||
/* Lock the tasklist before accessing */
|
||||
|
||||
irqstate_t lock = sched_tasklist_lock();
|
||||
|
||||
/* Remove and process every TCB in the g_pendingtasks list.
|
||||
*
|
||||
* Do nothing if (1) pre-emption is still disabled (by any CPU), or (2) if
|
||||
@ -215,7 +219,7 @@ bool sched_mergepending(void)
|
||||
{
|
||||
/* The pending task list is empty */
|
||||
|
||||
return ret;
|
||||
goto errout_with_lock;
|
||||
}
|
||||
|
||||
cpu = sched_cpu_select(ALL_CPUS /* ptcb->affinity */);
|
||||
@ -259,7 +263,7 @@ bool sched_mergepending(void)
|
||||
* pending task list.
|
||||
*/
|
||||
|
||||
return ret;
|
||||
goto errout_with_lock;
|
||||
}
|
||||
|
||||
/* Set up for the next time through the loop */
|
||||
@ -269,7 +273,7 @@ bool sched_mergepending(void)
|
||||
{
|
||||
/* The pending task list is empty */
|
||||
|
||||
return ret;
|
||||
goto errout_with_lock;
|
||||
}
|
||||
|
||||
cpu = sched_cpu_select(ALL_CPUS /* ptcb->affinity */);
|
||||
@ -285,6 +289,11 @@ bool sched_mergepending(void)
|
||||
TSTATE_TASK_READYTORUN);
|
||||
}
|
||||
|
||||
errout_with_lock:
|
||||
|
||||
/* Unlock the tasklist */
|
||||
|
||||
sched_tasklist_unlock(lock);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
@ -83,6 +83,12 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2,
|
||||
FAR struct tcb_s *tcb2;
|
||||
FAR struct tcb_s *tmp;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Lock the tasklists before accessing */
|
||||
|
||||
irqstate_t lock = sched_tasklist_lock();
|
||||
#endif
|
||||
|
||||
DEBUGASSERT(list1 != NULL && list2 != NULL);
|
||||
|
||||
/* Get a private copy of list1, clearing list1. We do this early so that
|
||||
@ -99,7 +105,7 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2,
|
||||
{
|
||||
/* Special case.. list1 is empty. There is nothing to be done. */
|
||||
|
||||
return;
|
||||
goto ret_with_lock;
|
||||
}
|
||||
|
||||
/* Now the TCBs are no longer accessible and we can change the state on
|
||||
@ -122,7 +128,7 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2,
|
||||
/* Special case.. list2 is empty. Move list1 to list2. */
|
||||
|
||||
dq_move(&clone, list2);
|
||||
return;
|
||||
goto ret_with_lock;
|
||||
}
|
||||
|
||||
/* Now loop until all entries from list1 have been merged into list2. tcb1
|
||||
@ -171,4 +177,13 @@ void sched_mergeprioritized(FAR dq_queue_t *list1, FAR dq_queue_t *list2,
|
||||
}
|
||||
}
|
||||
while (tcb1 != NULL);
|
||||
|
||||
ret_with_lock:
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Unlock the tasklists */
|
||||
|
||||
sched_tasklist_unlock(lock);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <stdbool.h>
|
||||
#include <queue.h>
|
||||
#include <assert.h>
|
||||
#include <nuttx/sched_note.h>
|
||||
|
||||
#include "irq/irq.h"
|
||||
#include "sched/sched.h"
|
||||
@ -138,6 +139,10 @@ bool sched_removereadytorun(FAR struct tcb_s *rtcb)
|
||||
bool doswitch = false;
|
||||
int cpu;
|
||||
|
||||
/* Lock the tasklists before accessing */
|
||||
|
||||
irqstate_t lock = sched_tasklist_lock();
|
||||
|
||||
/* Which CPU (if any) is the task running on? Which task list holds the
|
||||
* TCB?
|
||||
*/
|
||||
@ -283,10 +288,9 @@ bool sched_removereadytorun(FAR struct tcb_s *rtcb)
|
||||
|
||||
else if (g_cpu_nestcount[me] <= 0)
|
||||
{
|
||||
/* Release our hold on the IRQ lock. */
|
||||
|
||||
spin_clrbit(&g_cpu_irqset, cpu, &g_cpu_irqsetlock,
|
||||
&g_cpu_irqlock);
|
||||
/* Do nothing here
|
||||
* NOTE: spin_clrbit() will be done in sched_resumescheduler()
|
||||
*/
|
||||
}
|
||||
|
||||
/* Sanity check. g_cpu_netcount should be greater than zero
|
||||
@ -330,6 +334,10 @@ bool sched_removereadytorun(FAR struct tcb_s *rtcb)
|
||||
/* Since the TCB is no longer in any list, it is now invalid */
|
||||
|
||||
rtcb->task_state = TSTATE_TASK_INVALID;
|
||||
|
||||
/* Unlock the tasklists */
|
||||
|
||||
sched_tasklist_unlock(lock);
|
||||
return doswitch;
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
@ -45,10 +45,11 @@
|
||||
#include <nuttx/clock.h>
|
||||
#include <nuttx/sched_note.h>
|
||||
|
||||
#include "irq/irq.h"
|
||||
#include "sched/sched.h"
|
||||
|
||||
#if CONFIG_RR_INTERVAL > 0 || defined(CONFIG_SCHED_SPORADIC) || \
|
||||
defined(CONFIG_SCHED_INSTRUMENTATION)
|
||||
defined(CONFIG_SCHED_INSTRUMENTATION) || defined(CONFIG_SMP)
|
||||
|
||||
/****************************************************************************
|
||||
* Public Functions
|
||||
@ -101,7 +102,44 @@ void sched_resume_scheduler(FAR struct tcb_s *tcb)
|
||||
sched_note_resume(tcb);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* NOTE: The following logic for adjusting global IRQ controls were
|
||||
* derived from sched_addreadytorun() and sched_removedreadytorun()
|
||||
* Here, we only handles clearing logic to defer unlocking IRQ lock
|
||||
* followed by context switching.
|
||||
*/
|
||||
|
||||
int me = this_cpu();
|
||||
|
||||
/* Adjust global IRQ controls. If irqcount is greater than zero,
|
||||
* then this task/this CPU holds the IRQ lock
|
||||
*/
|
||||
|
||||
if (tcb->irqcount > 0)
|
||||
{
|
||||
/* Do notihing here
|
||||
* NOTE: spin_setbit() is done in sched_addreadytorun()
|
||||
* and sched_removereadytorun()
|
||||
*/
|
||||
}
|
||||
|
||||
/* No.. This CPU will be relinquishing the lock. But this works
|
||||
* differently if we are performing a context switch from an
|
||||
* interrupt handler and the interrupt handler has established
|
||||
* a critical section. We can detect this case when
|
||||
* g_cpu_nestcount[me] > 0.
|
||||
*/
|
||||
|
||||
else if (g_cpu_nestcount[me] <= 0)
|
||||
{
|
||||
/* Release our hold on the IRQ lock. */
|
||||
|
||||
spin_clrbit(&g_cpu_irqset, me, &g_cpu_irqsetlock,
|
||||
&g_cpu_irqlock);
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
}
|
||||
|
||||
#endif /* CONFIG_RR_INTERVAL > 0 || CONFIG_SCHED_SPORADIC || \
|
||||
* CONFIG_SCHED_INSTRUMENTATION */
|
||||
* CONFIG_SCHED_INSTRUMENTATION || CONFIG_SMP */
|
||||
|
133
sched/sched/sched_tasklistlock.c
Normal file
133
sched/sched/sched_tasklistlock.c
Normal file
@ -0,0 +1,133 @@
|
||||
/****************************************************************************
|
||||
* sched/sched/sched_tasklistlock.c
|
||||
*
|
||||
* Copyright (C) 2018 Sony Corporation. All rights reserved.
|
||||
* Author: Masayuki Ishikawa <Masayuki.Ishikawa@jp.sony.com>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* 3. Neither the name NuttX nor the names of its contributors may be
|
||||
* used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Included Files
|
||||
****************************************************************************/
|
||||
|
||||
#include <nuttx/config.h>
|
||||
#include <nuttx/spinlock.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <arch/irq.h>
|
||||
|
||||
#include "sched/sched.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Public Data
|
||||
****************************************************************************/
|
||||
|
||||
/* Splinlock to protect the tasklists */
|
||||
|
||||
static volatile spinlock_t g_tasklist_lock SP_SECTION = SP_UNLOCKED;
|
||||
|
||||
/* Handles nested calls */
|
||||
|
||||
static volatile uint8_t g_tasklist_lock_count[CONFIG_SMP_NCPUS];
|
||||
|
||||
/****************************************************************************
|
||||
* Public Functions
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Name: sched_tasklist_lock()
|
||||
*
|
||||
* Description:
|
||||
* Disable local interrupts and take the global spinlock (g_tasklist_lock)
|
||||
* if the call counter (g_tasklist_lock_count[cpu]) equals to 0. Then the
|
||||
* counter on the CPU is incremented to allow nested call.
|
||||
*
|
||||
* NOTE: This API is used to protect tasklists in the scheduler. So do not
|
||||
* use this API for other purposes.
|
||||
*
|
||||
* Returned Value:
|
||||
* An opaque, architecture-specific value that represents the state of
|
||||
* the interrupts prior to the call to sched_tasklist_lock();
|
||||
****************************************************************************/
|
||||
|
||||
irqstate_t sched_tasklist_lock(void)
|
||||
{
|
||||
int me;
|
||||
irqstate_t ret;
|
||||
|
||||
ret = up_irq_save();
|
||||
me = this_cpu();
|
||||
|
||||
if (0 == g_tasklist_lock_count[me])
|
||||
{
|
||||
spin_lock(&g_tasklist_lock);
|
||||
}
|
||||
|
||||
g_tasklist_lock_count[me]++;
|
||||
ASSERT(0 != g_tasklist_lock_count[me]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* Name: sched_tasklist_unlock()
|
||||
*
|
||||
* Description:
|
||||
* Decrement the call counter (g_tasklist_lock_count[cpu]) and if it
|
||||
* decrements to zero then release the spinlock (g_tasklist_lock) and
|
||||
* restore the interrupt state as it was prior to the previous call to
|
||||
* sched_tasklist_lock().
|
||||
*
|
||||
* NOTE: This API is used to protect tasklists in the scheduler. So do not
|
||||
* use this API for other purposes.
|
||||
*
|
||||
* Input Parameters:
|
||||
* lock - The architecture-specific value that represents the state of
|
||||
* the interrupts prior to the call to sched_tasklist_lock().
|
||||
*
|
||||
* Returned Value:
|
||||
* None
|
||||
****************************************************************************/
|
||||
|
||||
void sched_tasklist_unlock(irqstate_t lock)
|
||||
{
|
||||
int me;
|
||||
|
||||
me = this_cpu();
|
||||
|
||||
ASSERT(0 < g_tasklist_lock_count[me]);
|
||||
g_tasklist_lock_count[me]--;
|
||||
|
||||
if (0 == g_tasklist_lock_count[me])
|
||||
{
|
||||
spin_unlock(&g_tasklist_lock);
|
||||
}
|
||||
|
||||
up_irq_restore(lock);
|
||||
}
|
@ -97,6 +97,14 @@ int task_exit(void)
|
||||
(void)sched_removereadytorun(dtcb);
|
||||
rtcb = this_task();
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Because clearing the global IRQ control in sched_removereadytorun()
|
||||
* was moved to sched_resume_scheduler(). So call the API here.
|
||||
*/
|
||||
|
||||
sched_resume_scheduler(rtcb);
|
||||
#endif
|
||||
|
||||
/* We are now in a bad state -- the head of the ready to run task list
|
||||
* does not correspond to the thread that is running. Disabling pre-
|
||||
* emption on this TCB and marking the new ready-to-run task as not
|
||||
|
Loading…
x
Reference in New Issue
Block a user