From 084dd75e03a5b5c8726711a833cd8170dfd3f28b Mon Sep 17 00:00:00 2001 From: Joseph Hickey Date: Tue, 19 May 2020 10:06:57 -0400 Subject: [PATCH] Fix #470, bin sem unlock after task delete Corrects issue when a task waiting on a binary semaphore is deleted, it left the mutex in a locked state preventing other tasks from using the mutex. --- src/os/posix/src/os-impl-binsem.c | 54 +++++++++++++++++++++++++-- src/tests/bin-sem-test/bin-sem-test.c | 17 +++++++++ 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/os/posix/src/os-impl-binsem.c b/src/os/posix/src/os-impl-binsem.c index 28b01f309..b2e1832e2 100644 --- a/src/os/posix/src/os-impl-binsem.c +++ b/src/os/posix/src/os-impl-binsem.c @@ -28,9 +28,38 @@ #include "os-shared-binsem.h" #include "os-impl-binsem.h" +/* + * This controls the maximum time the that the calling thread will wait to + * acquire the condition mutex before returning an error. + * + * Under normal conditions, this lock is held by giving/taking threads very + * briefly, so the lock should be available with minimal delay. However, + * if the "taking" thread is canceled or exits abnormally without releasing the + * lock, it means any other task accessing the sem can get blocked indefinitely. + * + * There should be no reason for a user to configure this, as it should + * not be relevant in a normally operating system. This only prevents a + * deadlock condition in off-nominal circumstances. + */ +static const struct timespec OS_POSIX_BINSEM_MAX_WAIT = +{ + .tv_sec = 2, + .tv_nsec = 0 +}; + + /* Tables where the OS object information is stored */ OS_impl_binsem_internal_record_t OS_impl_bin_sem_table [OS_MAX_BIN_SEMAPHORES]; +/*--------------------------------------------------------------------------------------- + * Helper function for releasing the mutex in case the thread + * executing pthread_condwait() is canceled. + ----------------------------------------------------------------------------------------*/ +void OS_Posix_BinSemReleaseMutex(void *mut) +{ + pthread_mutex_unlock(mut); +} + /**************************************************************************************** BINARY SEMAPHORE API ***************************************************************************************/ @@ -244,10 +273,13 @@ int32 OS_BinSemGive_Impl ( uint32 sem_id ) * alternative of having a BinSemGive not wake up the other thread is a bigger issue. * * Counting sems do not suffer from this, as there is a native POSIX mechanism for those. + * + * Note: This lock should be readily available, with only minimal delay if any. + * If a long delay occurs here, it means something is fundamentally wrong. */ /* Lock the mutex ( not the table! ) */ - if ( pthread_mutex_lock(&(sem->id)) != 0 ) + if ( pthread_mutex_timedlock(&sem->id, &OS_POSIX_BINSEM_MAX_WAIT) != 0 ) { return(OS_SEM_FAILURE); } @@ -279,7 +311,7 @@ int32 OS_BinSemFlush_Impl (uint32 sem_id) sem = &OS_impl_bin_sem_table[sem_id]; /* Lock the mutex ( not the table! ) */ - if ( pthread_mutex_lock(&(sem->id)) != 0 ) + if ( pthread_mutex_timedlock(&sem->id, &OS_POSIX_BINSEM_MAX_WAIT) != 0 ) { return(OS_SEM_FAILURE); } @@ -311,12 +343,21 @@ static int32 OS_GenericBinSemTake_Impl (OS_impl_binsem_internal_record_t *sem, c sig_atomic_t flush_count; int32 return_code; + /* + * Note - this lock should be quickly available - should not delay here. + * The main delay is in the pthread_cond_wait() below. + */ /* Lock the mutex ( not the table! ) */ - if ( pthread_mutex_lock(&(sem->id)) != 0 ) + if ( pthread_mutex_timedlock(&sem->id, &OS_POSIX_BINSEM_MAX_WAIT) != 0 ) { return(OS_SEM_FAILURE); } + /* because pthread_cond_wait() is also a cancellation point, + * this uses a cleanup handler to ensure that if canceled during this call, + * the mutex is also released */ + pthread_cleanup_push(OS_Posix_BinSemReleaseMutex, &sem->id); + return_code = OS_SUCCESS; /* @@ -362,7 +403,12 @@ static int32 OS_GenericBinSemTake_Impl (OS_impl_binsem_internal_record_t *sem, c sem->current_value = 0; } - pthread_mutex_unlock(&(sem->id)); + /* + * Pop the cleanup handler. + * Passing "true" means it will be executed, which + * handles releasing the mutex. + */ + pthread_cleanup_pop(true); return return_code; } /* end OS_GenericBinSemTake_Impl */ diff --git a/src/tests/bin-sem-test/bin-sem-test.c b/src/tests/bin-sem-test/bin-sem-test.c index 029437bc7..f602fc832 100644 --- a/src/tests/bin-sem-test/bin-sem-test.c +++ b/src/tests/bin-sem-test/bin-sem-test.c @@ -137,10 +137,27 @@ void task_1(void) void BinSemCheck(void) { uint32 status; + OS_bin_sem_prop_t bin_sem_prop; + + /* Delete the task, which should be pending in OS_BinSemTake() */ + status = OS_TaskDelete(task_1_id); + UtAssert_True(status == OS_SUCCESS, "OS_TaskDelete Rc=%d", (int)status); status = OS_TimerDelete(timer_id); UtAssert_True(status == OS_SUCCESS, "OS_TimerDelete Rc=%d", (int)status); + OS_TaskDelay(100); + + /* Confirm that the semaphore itself is still operational after task deletion */ + status = OS_BinSemGive(bin_sem_id); + UtAssert_True(status == OS_SUCCESS, "BinSem give Rc=%d", (int)status); + status = OS_BinSemGetInfo (bin_sem_id, &bin_sem_prop); + UtAssert_True(status == OS_SUCCESS, "BinSem value=%d Rc=%d", (int)bin_sem_prop.value, (int)status); + status = OS_BinSemTake(bin_sem_id); + UtAssert_True(status == OS_SUCCESS, "BinSem take Rc=%d", (int)status); + status = OS_BinSemDelete(bin_sem_id); + UtAssert_True(status == OS_SUCCESS, "BinSem delete Rc=%d", (int)status); + UtAssert_True(counter < timer_counter, "Task counter (%d) < timer counter (%d)", (int)counter, (int)timer_counter); UtAssert_True(task_1_failures == 0, "Task 1 failures = %u", (unsigned int)task_1_failures); UtAssert_True(timer_failures == 0, "Timer failures = %u", (unsigned int)timer_failures);