From a24dc2ecc31e9e517b8b6ccf389627dfeed4511e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 31 Aug 2021 07:37:29 -0700 Subject: [PATCH] pystate: keep track of attached vs. detached state This adds a "status" field to each PyThreadState. The GC status will be useful for implementing stop-the-world garbage collection. --- Include/ceval.h | 1 + Include/cpython/pystate.h | 5 ++ Include/internal/pycore_ceval.h | 3 + Include/internal/pycore_pystate.h | 5 ++ Include/object.h | 29 ++++++++++ Python/ceval_gil.c | 13 +++++ Python/pystate.c | 91 ++++++++++++++++++++++++++----- 7 files changed, 132 insertions(+), 15 deletions(-) diff --git a/Include/ceval.h b/Include/ceval.h index ad4d909d6f2..3b25008c834 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -16,6 +16,7 @@ PyAPI_FUNC(PyObject *) PyEval_EvalCodeEx(PyObject *co, PyObject *const *kwds, int kwdc, PyObject *const *defs, int defc, PyObject *kwdefs, PyObject *closure); +/* Interface to random parts in ceval.c */ /* PyEval_CallObjectWithKeywords(), PyEval_CallObject(), PyEval_CallFunction * and PyEval_CallMethod are deprecated. Since they are officially part of the diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 0117c23f518..60257e0bcb6 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -114,6 +114,9 @@ struct _ts { PyThreadState *next; PyInterpreterState *interp; + /* thread status (attached, detached, gc) */ + int status; + /* Has been initialized to a safe state. In order to be effective, this must be set to 0 during or right @@ -164,6 +167,8 @@ struct _ts { */ unsigned long native_thread_id; + uintptr_t fast_thread_id; /* Thread id used for object ownership */ + int trash_delete_nesting; PyObject *trash_delete_later; diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index deda070a6de..ec9f5ec0ff8 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -152,6 +152,9 @@ extern PyObject* _Py_MakeCoro(PyFunctionObject *func); extern int _Py_HandlePending(PyThreadState *tstate); +extern void _PyEval_TakeGIL(PyThreadState *tstate); +extern void _PyEval_DropGIL(PyThreadState *tstate); + #ifdef __cplusplus diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index c13a777a80d..bba5a1f1ea5 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -10,6 +10,11 @@ extern "C" { #include "pycore_runtime.h" /* PyRuntimeState */ +enum _threadstatus { + _Py_THREAD_DETACHED = 0, + _Py_THREAD_ATTACHED = 1, + _Py_THREAD_GC = 2 +}; /* Check if the current thread is the main thread. Use _Py_IsMainInterpreter() to check if it's the main interpreter. */ diff --git a/Include/object.h b/Include/object.h index ad973322db1..eed6724e434 100644 --- a/Include/object.h +++ b/Include/object.h @@ -500,6 +500,35 @@ PyAPI_FUNC(void) _Py_NegativeRefcount(const char *filename, int lineno, PyAPI_FUNC(void) _Py_Dealloc(PyObject *); +static inline uintptr_t +_Py_ThreadId(void) +{ + // copied from mimalloc-internal.h + uintptr_t tid; +#if defined(_MSC_VER) && defined(_M_X64) + tid = __readgsqword(48); +#elif defined(_MSC_VER) && defined(_M_IX86) + tid = __readfsdword(24); +#elif defined(_MSC_VER) && defined(_M_ARM64) + tid = __getReg(18); +#elif defined(__i386__) + __asm__("movl %%gs:0, %0" : "=r" (tid)); // 32-bit always uses GS +#elif defined(__MACH__) && defined(__x86_64__) + __asm__("movq %%gs:0, %0" : "=r" (tid)); // x86_64 macOSX uses GS +#elif defined(__x86_64__) + __asm__("movq %%fs:0, %0" : "=r" (tid)); // x86_64 Linux, BSD uses FS +#elif defined(__arm__) + __asm__ ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tid)); +#elif defined(__aarch64__) && defined(__APPLE__) + __asm__ ("mrs %0, tpidrro_el0" : "=r" (tid)); +#elif defined(__aarch64__) + __asm__ ("mrs %0, tpidr_el0" : "=r" (tid)); +#else + # error "define _Py_ThreadId for this platform" +#endif + return tid; +} + /* These are provided as conveniences to Python runtime embedders, so that they can have object code that is not dependent on Python compilation flags. diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 83f4e91e545..b961071385d 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1004,3 +1004,16 @@ _Py_HandlePending(PyThreadState *tstate) return 0; } +void +_PyEval_TakeGIL(PyThreadState *tstate) +{ + _PyThreadState_SET(tstate); + take_gil(tstate); +} + +void +_PyEval_DropGIL(PyThreadState *tstate) +{ + _PyThreadState_SET(NULL); + _PyEval_ReleaseLock(tstate); +} diff --git a/Python/pystate.c b/Python/pystate.c index f0339b9d9e3..53d9d928bea 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -13,6 +13,7 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_sysmodule.h" +#include "pyatomic.h" /* -------------------------------------------------------------------------- CAUTION @@ -240,6 +241,30 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) static void _PyGILState_NoteThreadState( struct _gilstate_runtime_state *gilstate, PyThreadState* tstate); +int +_PyThreadState_GetStatus(PyThreadState *tstate) +{ + return _Py_atomic_load_int_relaxed(&tstate->status); +} + +static int +_PyThreadState_Attach(PyThreadState *tstate) +{ + if (_Py_atomic_compare_exchange_int( + &tstate->status, + _Py_THREAD_DETACHED, + _Py_THREAD_ATTACHED)) { + return 1; + } + return 0; +} + +static void +_PyThreadState_Detach(PyThreadState *tstate) +{ + _Py_atomic_store_int(&tstate->status, _Py_THREAD_DETACHED); +} + PyStatus _PyInterpreterState_Enable(_PyRuntimeState *runtime) { @@ -517,13 +542,14 @@ PyInterpreterState_Delete(PyInterpreterState *interp) { _PyRuntimeState *runtime = interp->runtime; struct pyinterpreters *interpreters = &runtime->interpreters; - zapthreads(interp, 0); - - _PyEval_FiniState(&interp->ceval); /* Delete current thread. After this, many C API calls become crashy. */ _PyThreadState_Swap(&runtime->gilstate, NULL); + zapthreads(interp, 0); + + _PyEval_FiniState(&interp->ceval); + HEAD_LOCK(runtime); PyInterpreterState **p; for (p = &interpreters->head; ; p = &(*p)->next) { @@ -910,6 +936,7 @@ _PyThreadState_Init(PyThreadState *tstate) void _PyThreadState_SetCurrent(PyThreadState *tstate) { + tstate->fast_thread_id = _Py_ThreadId(); _PyGILState_NoteThreadState(&tstate->interp->runtime->gilstate, tstate); } @@ -1094,15 +1121,25 @@ PyThreadState_Clear(PyThreadState *tstate) /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ static void tstate_delete_common(PyThreadState *tstate, - struct _gilstate_runtime_state *gilstate) + struct _gilstate_runtime_state *gilstate, + int is_current) { + assert(is_current ? tstate->status == _Py_THREAD_ATTACHED + : tstate->status != _Py_THREAD_ATTACHED); + _Py_EnsureTstateNotNULL(tstate); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { Py_FatalError("NULL interpreter"); } - _PyRuntimeState *runtime = interp->runtime; + if (gilstate->autoInterpreterState && + PyThread_tss_get(&gilstate->autoTSSkey) == tstate) + { + PyThread_tss_set(&gilstate->autoTSSkey, NULL); + } + + _PyRuntimeState *runtime = interp->runtime; HEAD_LOCK(runtime); if (tstate->prev) { tstate->prev->next = tstate->next; @@ -1115,10 +1152,8 @@ tstate_delete_common(PyThreadState *tstate, } HEAD_UNLOCK(runtime); - if (gilstate->autoInterpreterState && - PyThread_tss_get(&gilstate->autoTSSkey) == tstate) - { - PyThread_tss_set(&gilstate->autoTSSkey, NULL); + if (is_current) { + _PyThreadState_SET(NULL); } _PyStackChunk *chunk = tstate->datastack_chunk; tstate->datastack_chunk = NULL; @@ -1138,7 +1173,7 @@ _PyThreadState_Delete(PyThreadState *tstate, int check_current) _Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate); } } - tstate_delete_common(tstate, gilstate); + tstate_delete_common(tstate, gilstate, 0); free_threadstate(tstate); } @@ -1155,7 +1190,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate) { _Py_EnsureTstateNotNULL(tstate); struct _gilstate_runtime_state *gilstate = &tstate->interp->runtime->gilstate; - tstate_delete_common(tstate, gilstate); + tstate_delete_common(tstate, gilstate, 1); _PyRuntimeGILState_SetThreadState(gilstate, NULL); _PyEval_ReleaseLock(tstate); free_threadstate(tstate); @@ -1230,9 +1265,36 @@ PyThreadState_Get(void) PyThreadState * _PyThreadState_Swap(struct _gilstate_runtime_state *gilstate, PyThreadState *newts) { - PyThreadState *oldts = _PyRuntimeGILState_GetThreadState(gilstate); + PyThreadState *oldts = _Py_current_tstate; + +#if defined(Py_DEBUG) + // The new thread-state should correspond to the current native thread + // XXX: breaks subinterpreter tests + if (newts && newts->fast_thread_id != _Py_ThreadId()) { + Py_FatalError("Invalid thread state for this thread"); + } +#endif + + if (oldts != NULL) { + int status = _Py_atomic_load_int(&oldts->status); + assert(status == _Py_THREAD_ATTACHED || status == _Py_THREAD_GC); + + if (status == _Py_THREAD_ATTACHED) { + _PyThreadState_Detach(oldts); + } + } + + _Py_current_tstate = newts; + + if (newts) { + int attached = _PyThreadState_Attach(newts); + if (!attached) { + // _PyThreadState_GC_Park(newts); + } + + assert(_Py_atomic_load_int(&newts->status) == _Py_THREAD_ATTACHED); + } - _PyRuntimeGILState_SetThreadState(gilstate, newts); /* It should not be possible for more than one thread state to be used for a thread. Check this the best we can in debug builds. @@ -1243,8 +1305,7 @@ _PyThreadState_Swap(struct _gilstate_runtime_state *gilstate, PyThreadState *new to it, we need to ensure errno doesn't change. */ int err = errno; - PyThreadState *check = _PyGILState_GetThisThreadState(gilstate); - if (check && check->interp == newts->interp && check != newts) + if (oldts && oldts->interp == newts->interp && oldts != newts) Py_FatalError("Invalid thread state for this thread"); errno = err; }