From db1dd036a9d3df8487cac845f4d2d85b63d6ed70 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 25 Nov 2023 13:09:08 -0500 Subject: [PATCH] gh-111964: Implement stop-the-world pauses The `--disable-gil` builds occasionally need to pause all but one thread. Some examples include: * Cyclic garbage collection, where this is often called a "stop the world event" * Before calling `fork()`, to ensure a consistent state for internal data structures * During interpreter shutdown, to ensure that daemon threads aren't accessing Python objects This adds the following functions to implement global and per-interpreter pauses: * `_PyRuntimeState_StopTheWorld` and `_PyRuntimeState_StartTheWorld` * `_PyInterpreterState_StopTheWorld` and `_PyInterpreterState_StartTheWorld` These functions are no-ops outside of the `--disable-gil` build. This also adds `_PyRWMutex`, a "readers-writer" lock, which is used to serialize global stop-the-world pauses with per-interpreter pauses. --- Include/internal/pycore_ceval.h | 1 + Include/internal/pycore_interp.h | 17 ++ Include/internal/pycore_lock.h | 24 +++ Include/internal/pycore_pystate.h | 28 ++- Include/internal/pycore_runtime.h | 3 + Include/internal/pycore_runtime_init.h | 3 + Include/pymacro.h | 3 + Modules/_testinternalcapi/test_lock.c | 93 +++++++++ Python/ceval_gil.c | 6 + Python/lock.c | 86 ++++++++ Python/parking_lot.c | 9 +- Python/pystate.c | 265 +++++++++++++++++++++++-- 12 files changed, 520 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index c372b7224fb047b..696b5978c78791c 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -205,6 +205,7 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame) #define _PY_CALLS_TO_DO_BIT 2 #define _PY_ASYNC_EXCEPTION_BIT 3 #define _PY_GC_SCHEDULED_BIT 4 +#define _PY_EVAL_PLEASE_STOP_BIT 5 /* Reserve a few bits for future use */ #define _PY_EVAL_EVENTS_BITS 8 diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 498db8becf114cf..09877077ac7dbd7 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -39,6 +39,22 @@ struct _Py_long_state { int max_str_digits; }; +// Support for stop-the-world events. This exists in both the PyRuntime struct +// for global pauses and in each PyInterpreterState for per-interpreter pauses. +struct _stoptheworld_state { + PyMutex mutex; // Serializes stop-the-world attempts. + + // NOTE: The below fields are protected by HEAD_LOCK(runtime), not by the + // above mutex. + bool requested; // Set when a pause is requested. + bool world_stopped; // Set when the world is stopped. + bool is_global; // Set when contained in PyRuntime struct. + + PyEvent stop_event; // Set when thread_countdown reaches zero. + Py_ssize_t thread_countdown; // Number of threads that must pause. + + PyThreadState *requester; // Thread that requested the pause (may be NULL). +}; /* cross-interpreter data registry */ @@ -164,6 +180,7 @@ struct _is { struct _warnings_runtime_state warnings; struct atexit_state atexit; + struct _stoptheworld_state stoptheworld; struct _obmalloc_state obmalloc; diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index f135cbbc3754fba..46100342561b100 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -196,6 +196,30 @@ _PyOnceFlag_CallOnce(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg) return _PyOnceFlag_CallOnceSlow(flag, fn, arg); } +// A readers-writer (RW) lock. The lock supports multiple concurrent readers or +// a single writer. The lock is write-preferring: if a writer is waiting, then +// new readers will be blocked. This avoids starvation of writers. +// +// The low two bits store whether the lock is write-locked (_Py_LOCKED) and +// whether there are parked threads (_Py_HAS_PARKED). The remaining bits are +// used to store the number of readers. +// +// The design is optimized for simplicity of the implementation. The lock is +// not fair: if fairness is desired, use an additional PyMutex to serialize +// writers. The lock is also not reentrant. +typedef struct { + uintptr_t bits; +} _PyRWMutex; + +// Read lock +PyAPI_FUNC(void) _PyRWMutex_RLock(_PyRWMutex *rwmutex); +PyAPI_FUNC(void) _PyRWMutex_RUnlock(_PyRWMutex *rwmutex); + +// Write lock +PyAPI_FUNC(void) _PyRWMutex_Lock(_PyRWMutex *rwmutex); +PyAPI_FUNC(void) _PyRWMutex_Unlock(_PyRWMutex *rwmutex); + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 7fa952e371d7b46..05d9b4a8314a4bd 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -138,13 +138,37 @@ _PyThreadState_GET(void) // // High-level code should generally call PyEval_RestoreThread() instead, which // calls this function. -void _PyThreadState_Attach(PyThreadState *tstate); +extern void _PyThreadState_Attach(PyThreadState *tstate); // Detaches the current thread from the interpreter. // // High-level code should generally call PyEval_SaveThread() instead, which // calls this function. -void _PyThreadState_Detach(PyThreadState *tstate); +extern void _PyThreadState_Detach(PyThreadState *tstate); + +// Temporarily pauses the thread in the GC state. +// +// This is used to implement stop-the-world pauses. The thread must be in the +// "attached" state. It will switch to the "GC" state and pause until the +// stop-the-world event completes, after which it will switch back to the +// "attached" state. +extern void _PyThreadState_Park(PyThreadState *tstate); + +// Perform a stop-the-world pause for all threads in the all interpreters. +// +// Threads in the "attached" state are paused and transitioned to the "GC" +// state. Threads in the "detached" state switch to the "GC" state, preventing +// them from reattaching until the stop-the-world pause is complete. +// +// NOTE: This is a no-op outside of Py_GIL_DISABLED builds. +extern void _PyRuntimeState_StopTheWorld(_PyRuntimeState *runtime); +extern void _PyRuntimeState_StartTheWorld(_PyRuntimeState *runtime); + +// Perform a stop-the-world pause for threads in the specified interpreter. +// +// NOTE: This is a no-op outside of Py_GIL_DISABLED builds. +extern void _PyInterpreterState_StopTheWorld(PyInterpreterState *interp); +extern void _PyInterpreterState_StartTheWorld(PyInterpreterState *interp); static inline void diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index e6efe8b646e86f8..c080e14a4dc3909 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -229,6 +229,9 @@ typedef struct pyruntimestate { struct _faulthandler_runtime_state faulthandler; struct _tracemalloc_runtime_state tracemalloc; + _PyRWMutex stoptheworld_mutex; + struct _stoptheworld_state stoptheworld; + PyPreConfig preconfig; // Audit values must be preserved when Py_Initialize()/Py_Finalize() diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index fa5d8114abf0d71..5defa0443897ad8 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -115,6 +115,9 @@ extern PyTypeObject _PyExc_MemoryError; }, \ .faulthandler = _faulthandler_runtime_state_INIT, \ .tracemalloc = _tracemalloc_runtime_state_INIT, \ + .stoptheworld = { \ + .is_global = 1, \ + }, \ .float_state = { \ .float_format = _py_float_format_unknown, \ .double_format = _py_float_format_unknown, \ diff --git a/Include/pymacro.h b/Include/pymacro.h index 9d264fe6eea1d44..cd6fc4eba9c2ed8 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -160,6 +160,9 @@ Py_FatalError("Unreachable C code path reached") #endif +#define _Py_CONTAINER_OF(ptr, type, member) \ + (type*)((char*)ptr - offsetof(type, member)) + // Prevent using an expression as a l-value. // For example, "int x; _Py_RVALUE(x) = 1;" fails with a compiler error. #define _Py_RVALUE(EXPR) ((void)0, (EXPR)) diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index 418f71c1441995e..1dd3c736ad3f289 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -372,6 +372,98 @@ test_lock_once(PyObject *self, PyObject *obj) Py_RETURN_NONE; } +struct test_rwlock_data { + Py_ssize_t nthreads; + _PyRWMutex rw; + PyEvent step1; + PyEvent step2; + PyEvent step3; + PyEvent done; +}; + +static void +rdlock_thread(void *arg) +{ + struct test_rwlock_data *test_data = arg; + + // Acquire the lock in read mode + _PyRWMutex_RLock(&test_data->rw); + PyEvent_Wait(&test_data->step1); + _PyRWMutex_RUnlock(&test_data->rw); + + _PyRWMutex_RLock(&test_data->rw); + PyEvent_Wait(&test_data->step3); + _PyRWMutex_RUnlock(&test_data->rw); + + if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { + _PyEvent_Notify(&test_data->done); + } +} +static void +wrlock_thread(void *arg) +{ + struct test_rwlock_data *test_data = arg; + + // First acquire the lock in write mode + _PyRWMutex_Lock(&test_data->rw); + PyEvent_Wait(&test_data->step2); + _PyRWMutex_Unlock(&test_data->rw); + + if (_Py_atomic_add_ssize(&test_data->nthreads, -1) == 1) { + _PyEvent_Notify(&test_data->done); + } +} + +static void +wait_until(uintptr_t *ptr, uintptr_t value) +{ + // wait up to two seconds for *ptr == value + int iters = 0; + uintptr_t bits; + do { + pysleep(10); + bits = _Py_atomic_load_uintptr(ptr); + iters++; + } while (bits != value && iters < 200); +} + +static PyObject * +test_lock_rwlock(PyObject *self, PyObject *obj) +{ + struct test_rwlock_data test_data = {.nthreads = 3}; + + // Start two readers + PyThread_start_new_thread(rdlock_thread, &test_data); + PyThread_start_new_thread(rdlock_thread, &test_data); + + // wait up to two seconds for the threads to attempt to read-lock "rw" + wait_until(&test_data.rw.bits, 8); + assert(test_data.rw.bits == 8); + + // start writer (while readers hold lock) + PyThread_start_new_thread(wrlock_thread, &test_data); + wait_until(&test_data.rw.bits, 10); + assert(test_data.rw.bits == 10); + + // readers release lock, writer should acquire it + _PyEvent_Notify(&test_data.step1); + wait_until(&test_data.rw.bits, 3); + assert(test_data.rw.bits == 3); + + // writer releases lock, readers acquire it + _PyEvent_Notify(&test_data.step2); + wait_until(&test_data.rw.bits, 8); + assert(test_data.rw.bits == 8); + + // readers release lock again + _PyEvent_Notify(&test_data.step3); + wait_until(&test_data.rw.bits, 0); + assert(test_data.rw.bits == 0); + + PyEvent_Wait(&test_data.done); + Py_RETURN_NONE; +} + static PyMethodDef test_methods[] = { {"test_lock_basic", test_lock_basic, METH_NOARGS}, {"test_lock_two_threads", test_lock_two_threads, METH_NOARGS}, @@ -380,6 +472,7 @@ static PyMethodDef test_methods[] = { _TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF {"test_lock_benchmark", test_lock_benchmark, METH_NOARGS}, {"test_lock_once", test_lock_once, METH_NOARGS}, + {"test_lock_rwlock", test_lock_rwlock, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 92c4b2fee9f863e..2cfc816dae29be8 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -975,6 +975,12 @@ _Py_HandlePending(PyThreadState *tstate) { PyInterpreterState *interp = tstate->interp; + /* Pending signals */ + if (_Py_eval_breaker_bit_is_set(interp, _PY_EVAL_PLEASE_STOP_BIT)) { + _Py_set_eval_breaker_bit(interp, _PY_EVAL_PLEASE_STOP_BIT, 0); + _PyThreadState_Park(tstate); + } + /* Pending signals */ if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) { if (handle_signals(tstate) != 0) { diff --git a/Python/lock.c b/Python/lock.c index e9279f0b92a5e79..1883aff7b5e03cd 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -353,3 +353,89 @@ _PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg) v = _Py_atomic_load_uint8(&flag->v); } } + +#define _PyRWMutex_READER_SHIFT 2 + +void +_PyRWMutex_RLock(_PyRWMutex *rwmutex) +{ + uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits); + for (;;) { + // If the lock is not write-locked and there is no writer waiting, then + // we can increment the reader count. + if ((bits & (_Py_LOCKED|_Py_HAS_PARKED)) == 0) { + uintptr_t newval = bits + (1 << _PyRWMutex_READER_SHIFT); + if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits, + &bits, newval)) { + continue; + } + return; + } + + // Set _Py_HAS_PARKED if it's not already set. + if ((bits & _Py_HAS_PARKED) == 0) { + uintptr_t newval = bits | _Py_HAS_PARKED; + if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits, + &bits, newval)) { + continue; + } + bits = newval; + } + + _PyParkingLot_Park(&rwmutex->bits, &bits, sizeof(bits), -1, NULL, 1); + bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits); + } +} + +void +_PyRWMutex_RUnlock(_PyRWMutex *rwmutex) +{ + uintptr_t bits = _Py_atomic_add_uintptr(&rwmutex->bits, -(1 << _PyRWMutex_READER_SHIFT)); + bits -= (1 << _PyRWMutex_READER_SHIFT); + + if ((bits >> _PyRWMutex_READER_SHIFT) == 0 && (bits & _Py_HAS_PARKED)) { + _PyParkingLot_UnparkAll(&rwmutex->bits); + return; + } +} + +void +_PyRWMutex_Lock(_PyRWMutex *rwmutex) +{ + uintptr_t bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits); + for (;;) { + // If there are no active readers and it's not already write-locked, + // then we can grab the lock. + if ((bits & ~_Py_HAS_PARKED) == 0) { + if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits, + &bits, + bits | _Py_LOCKED)) { + return; + } + continue; + } + + if (!(bits & _Py_HAS_PARKED)) { + if (!_Py_atomic_compare_exchange_uintptr(&rwmutex->bits, + &bits, + bits | _Py_HAS_PARKED)) { + continue; + } + bits |= _Py_HAS_PARKED; + } + + _PyParkingLot_Park(&rwmutex->bits, &bits, sizeof(bits), -1, NULL, 1); + bits = _Py_atomic_load_uintptr_relaxed(&rwmutex->bits); + } +} + +void +_PyRWMutex_Unlock(_PyRWMutex *rwmutex) +{ + uintptr_t old_bits = _Py_atomic_exchange_uintptr(&rwmutex->bits, 0); + assert(old_bits >> _PyRWMutex_READER_SHIFT == 0); + + if ((old_bits & _Py_HAS_PARKED) != 0) { + _PyParkingLot_UnparkAll(&rwmutex->bits); + } +} diff --git a/Python/parking_lot.c b/Python/parking_lot.c index 664e622cc174741..cb65073a33f1909 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -118,10 +118,15 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, _PyTime_t timeout) if (timeout >= 0) { struct timespec ts; +# ifdef HAVE_SEM_CLOCKWAIT + _PyTime_t deadline = _PyDeadline_Init(timeout); + _PyTime_AsTimespec_clamp(deadline, &ts); + err = sem_clockwait(&sema->platform_sem, CLOCK_MONOTONIC, &ts); +# else _PyTime_t deadline = _PyTime_Add(_PyTime_GetSystemClock(), timeout); - _PyTime_AsTimespec(deadline, &ts); - + _PyTime_AsTimespec_clamp(deadline, &ts); err = sem_timedwait(&sema->platform_sem, &ts); + #endif } else { err = sem_wait(&sema->platform_sem); diff --git a/Python/pystate.c b/Python/pystate.c index 6196b15da0117a2..d17b24ded1a15d0 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1631,6 +1631,9 @@ PyThreadState_Clear(PyThreadState *tstate) // XXX Do it as early in the function as possible. } +static void +decrement_stoptheworld_countdown(struct _stoptheworld_state *stw); + /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ static void tstate_delete_common(PyThreadState *tstate) @@ -1654,6 +1657,16 @@ tstate_delete_common(PyThreadState *tstate) if (tstate->next) { tstate->next->prev = tstate->prev; } + if (tstate->state != _Py_THREAD_GC) { + // Any ongoing stop-the-world request should not wait for us because + // our thread is getting deleted. + if (interp->stoptheworld.requested) { + decrement_stoptheworld_countdown(&interp->stoptheworld); + } + if (runtime->stoptheworld.requested) { + decrement_stoptheworld_countdown(&runtime->stoptheworld); + } + } HEAD_UNLOCK(runtime); // XXX Unbind in PyThreadState_Clear(), or earlier @@ -1859,13 +1872,9 @@ tstate_try_attach(PyThreadState *tstate) { #ifdef Py_GIL_DISABLED int expected = _Py_THREAD_DETACHED; - if (_Py_atomic_compare_exchange_int( - &tstate->state, - &expected, - _Py_THREAD_ATTACHED)) { - return 1; - } - return 0; + return _Py_atomic_compare_exchange_int(&tstate->state, + &expected, + _Py_THREAD_ATTACHED); #else assert(tstate->state == _Py_THREAD_DETACHED); tstate->state = _Py_THREAD_ATTACHED; @@ -1884,6 +1893,20 @@ tstate_set_detached(PyThreadState *tstate) #endif } +static void +tstate_wait_attach(PyThreadState *tstate) +{ + do { + int expected = _Py_THREAD_GC; + + // Wait until we're switched out of GC to DETACHED. + _PyParkingLot_Park(&tstate->state, &expected, sizeof(tstate->state), + /*timeout=*/-1, NULL, /*detach=*/0); + + // Once we're back in DETACHED we can re-attach + } while (!tstate_try_attach(tstate)); +} + void _PyThreadState_Attach(PyThreadState *tstate) { @@ -1905,10 +1928,7 @@ _PyThreadState_Attach(PyThreadState *tstate) tstate_activate(tstate); if (!tstate_try_attach(tstate)) { - // TODO: Once stop-the-world GC is implemented for --disable-gil builds - // this will need to wait until the GC completes. For now, this case - // should never happen. - Py_FatalError("thread attach failed"); + tstate_wait_attach(tstate); } // Resume previous critical section. This acquires the lock(s) from the @@ -1922,8 +1942,8 @@ _PyThreadState_Attach(PyThreadState *tstate) #endif } -void -_PyThreadState_Detach(PyThreadState *tstate) +static void +detach_thread(PyThreadState *tstate, int detached_state) { // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate)); assert(tstate->state == _Py_THREAD_ATTACHED); @@ -1931,12 +1951,229 @@ _PyThreadState_Detach(PyThreadState *tstate) if (tstate->critical_section != 0) { _PyCriticalSection_SuspendAll(tstate); } - tstate_set_detached(tstate); tstate_deactivate(tstate); + tstate_set_detached(tstate); current_fast_clear(&_PyRuntime); _PyEval_ReleaseLock(tstate->interp, tstate); } +void +_PyThreadState_Detach(PyThreadState *tstate) +{ + detach_thread(tstate, _Py_THREAD_DETACHED); +} + +// Decrease stop-the-world counter of remaining number of threads that need to +// pause. If we are the final thread to pause, notify the requesting thread. +static void +decrement_stoptheworld_countdown(struct _stoptheworld_state *stw) +{ + assert(stw->thread_countdown > 0); + if (--stw->thread_countdown == 0) { + _PyEvent_Notify(&stw->stop_event); + } +} + +void +_PyThreadState_Park(PyThreadState *tstate) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + assert(tstate->state == _Py_THREAD_ATTACHED); + + struct _stoptheworld_state *stw = NULL; + HEAD_LOCK(runtime); + if (runtime->stoptheworld.requested) { + stw = &runtime->stoptheworld; + } + else if (tstate->interp->stoptheworld.requested) { + stw = &tstate->interp->stoptheworld; + } + HEAD_UNLOCK(runtime); + + if (stw == NULL) { + // We might be processing a stale EVAL_PLEASE_STOP, in which + // case there is nothing to do. This can happen if a thread + // asks us to stop for a previous GC at the same time we detach. + return; + } + + // Switch to GC state. + detach_thread(tstate, _Py_THREAD_GC); + + // Decrease the count of remaining threads needing to park. + HEAD_LOCK(runtime); + decrement_stoptheworld_countdown(stw); + HEAD_UNLOCK(runtime); + + // Wait until we are switched back to DETACHED and then re-attach. After + // this we will be in the ATTACHED state, the same as before. + tstate_wait_attach(tstate); +} + + +#ifdef Py_GIL_DISABLED +// Interpreter for _Py_FOR_EACH_THREAD(). For global stop-the-world events, +// we start with the first interpreter and then iterate over all interpreters. +// For per-interpreter stop-the-world events, we only operate on the one +// interpreter. +static PyInterpreterState * +interp_for_stop_the_world(struct _stoptheworld_state *stw) +{ + return (stw->is_global + ? PyInterpreterState_Head() + : _Py_CONTAINER_OF(stw, PyInterpreterState, stoptheworld)); +} + +// Loops over threads for a stop-the-world event. +// For global: all threads in all interpreters +// For per-interpreter: all threads in the interpreter +#define _Py_FOR_EACH_THREAD(stw) \ + for (PyInterpreterState *i = interp_for_stop_the_world((stw)); \ + i != NULL; i = ((stw->is_global) ? i->next : NULL)) \ + for (PyThreadState *t = i->threads.head; t; t = t->next) + + +// Try to transition threads atomically from the "detached" state to the +// "gc stopped" state. Returns true if all threads are in the "gc stopped" +static bool +park_detached_threads(struct _stoptheworld_state *stw) +{ + int num_parked = 0; + _Py_FOR_EACH_THREAD(stw) { + int state = _Py_atomic_load_int_relaxed(&t->state); + if (state == _Py_THREAD_DETACHED) { + // Atomically transition to _Py_THREAD_GC if in detached state. + if (_Py_atomic_compare_exchange_int(&t->state, + &state, _Py_THREAD_GC)) { + num_parked++; + } + } + else if (state == _Py_THREAD_ATTACHED && t != stw->requester) { + // TODO: set this per-thread, rather than per-interpreter. + _Py_set_eval_breaker_bit(t->interp, _PY_EVAL_PLEASE_STOP_BIT, 1); + } + } + stw->thread_countdown -= num_parked; + assert(stw->thread_countdown >= 0); + return num_parked > 0 && stw->thread_countdown == 0; +} + +static void +stop_the_world(struct _stoptheworld_state *stw) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + PyMutex_Lock(&stw->mutex); + if (stw->is_global) { + _PyRWMutex_Lock(&runtime->stoptheworld_mutex); + } + else { + _PyRWMutex_RLock(&runtime->stoptheworld_mutex); + } + + HEAD_LOCK(runtime); + stw->requested = 1; + stw->thread_countdown = 0; + stw->requester = _PyThreadState_GET(); // may be NULL + + _Py_FOR_EACH_THREAD(stw) { + if (t != stw->requester) { + // Count all the other threads (we don't wait on ourself). + stw->thread_countdown++; + } + } + + if (stw->thread_countdown == 0) { + HEAD_UNLOCK(runtime); + stw->world_stopped = 1; + return; + } + + for (;;) { + // Switch threads that are detached to the GC stopped state + bool stopped_all_threads = park_detached_threads(stw); + HEAD_UNLOCK(runtime); + + if (stopped_all_threads) { + break; + } + + int64_t wait_ns = 1000*1000; // 1ms + if (PyEvent_WaitTimed(&stw->stop_event, wait_ns)) { + assert(stw->thread_countdown == 0); + stw->stop_event = (PyEvent){0}; + break; + } + + HEAD_LOCK(runtime); + } + stw->world_stopped = 1; +} + +static void +start_the_world(struct _stoptheworld_state *stw) +{ + _PyRuntimeState *runtime = &_PyRuntime; + assert(PyMutex_IsLocked(&stw->mutex)); + + HEAD_LOCK(runtime); + stw->requested = 0; + stw->world_stopped = 0; + stw->requester = NULL; + // Switch threads back to the detached state. + _Py_FOR_EACH_THREAD(stw) { + int state = _Py_atomic_load_int_relaxed(&t->state); + if (state == _Py_THREAD_GC && + _Py_atomic_compare_exchange_int(&t->state, + &state, + _Py_THREAD_DETACHED)) { + _PyParkingLot_UnparkAll(&t->state); + } + } + HEAD_UNLOCK(runtime); + if (stw->is_global) { + _PyRWMutex_Unlock(&runtime->stoptheworld_mutex); + } + else { + _PyRWMutex_RUnlock(&runtime->stoptheworld_mutex); + } + PyMutex_Unlock(&stw->mutex); +} +#endif // Py_GIL_DISABLED + +void +_PyRuntimeState_StopTheWorld(_PyRuntimeState *runtime) +{ +#ifdef Py_GIL_DISABLED + stop_the_world(&runtime->stoptheworld); +#endif +} + +void +_PyRuntimeState_StartTheWorld(_PyRuntimeState *runtime) +{ +#ifdef Py_GIL_DISABLED + start_the_world(&runtime->stoptheworld); +#endif +} + +void +_PyInterpreterState_StopTheWorld(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + stop_the_world(&interp->stoptheworld); +#endif +} + +void +_PyInterpreterState_StartTheWorld(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + start_the_world(&interp->stoptheworld); +#endif +} + //---------- // other API //----------