From 141222f190dc1904dd6d6faa9aeed8c1956b0074 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 3 Nov 2023 15:38:09 -0400 Subject: [PATCH] gh-111956: Add thread-safe one-time initialization. The one-time initialization (`_PyOnceFlag`) is used in two places: * `Python/Python-ast.c` * `Python/getargs.c` --- Include/internal/pycore_ast_state.h | 6 ++- Include/internal/pycore_lock.h | 30 ++++++++++++ Include/internal/pycore_modsupport.h | 6 ++- ...-11-10-10-24-28.gh-issue-111956.ImE6Cx.rst | 2 + Modules/_testinternalcapi/test_lock.c | 32 ++++++++++++ Parser/asdl_c.py | 25 ++++------ Python/Python-ast.c | 19 ++----- Python/getargs.c | 32 ++++-------- Python/lock.c | 49 +++++++++++++++++++ 9 files changed, 148 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-11-10-10-24-28.gh-issue-111956.ImE6Cx.rst diff --git a/Include/internal/pycore_ast_state.h b/Include/internal/pycore_ast_state.h index 0c0d53f3e5d7e90..6ffd30aca7b11b7 100644 --- a/Include/internal/pycore_ast_state.h +++ b/Include/internal/pycore_ast_state.h @@ -2,6 +2,9 @@ #ifndef Py_INTERNAL_AST_STATE_H #define Py_INTERNAL_AST_STATE_H + +#include "pycore_lock.h" // _PyOnceFlag + #ifdef __cplusplus extern "C" { #endif @@ -11,7 +14,8 @@ extern "C" { #endif struct ast_state { - int initialized; + _PyOnceFlag once; + int finalized; int recursion_depth; int recursion_limit; PyObject *AST_type; diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index fe5e21fad221e4e..3da5558266c9b18 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -46,6 +46,7 @@ typedef struct _PyMutex PyMutex; #define _Py_UNLOCKED 0 #define _Py_LOCKED 1 #define _Py_HAS_PARKED 2 +#define _Py_ONCE_INITIALIZED 4 // (private) slow path for locking the mutex PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m); @@ -166,6 +167,35 @@ _PyRawMutex_Unlock(_PyRawMutex *m) _PyRawMutex_UnlockSlow(m); } +// A data structure that can be used to run initialization code once in a +// thread-safe manner. The C++11 equivalent is std::call_once. +typedef struct { + uint8_t v; +} _PyOnceFlag; + +// Type signature for one-time initialization functions. The function should +// return 1 on success and 0 on failure. +typedef int _Py_once_fn_t(void *arg); + +// (private) slow path for one time initialization +PyAPI_FUNC(int) +_PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg); + +// Calls `fn` once using `flag`. The `arg` is passed to the call to `fn`. +// +// Returns 1 on success and 0 on failure. +// +// If `fn` returns 1 (success), then subsequent calls immediately return 1. +// If `fn` returns 0 (failure), then subsequent calls will retry the call. +static inline int +_PyOnceFlag_CallOnce(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg) +{ + if (_Py_atomic_load_uint8(&flag->v) == _Py_ONCE_INITIALIZED) { + return 1; + } + return _PyOnceFlag_CallOnceSlow(flag, fn, arg); +} + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_modsupport.h b/Include/internal/pycore_modsupport.h index e12f3b72c8415b3..8a242ed78bfc63c 100644 --- a/Include/internal/pycore_modsupport.h +++ b/Include/internal/pycore_modsupport.h @@ -1,5 +1,8 @@ #ifndef Py_INTERNAL_MODSUPPORT_H #define Py_INTERNAL_MODSUPPORT_H + +#include "pycore_lock.h" // _PyOnceFlag + #ifdef __cplusplus extern "C" { #endif @@ -65,11 +68,12 @@ PyAPI_FUNC(void) _PyArg_BadArgument( // --- _PyArg_Parser API --------------------------------------------------- typedef struct _PyArg_Parser { - int initialized; const char *format; const char * const *keywords; const char *fname; const char *custom_msg; + _PyOnceFlag once; /* atomic one-time initialization flag */ + int is_kwtuple_owned; /* does this parser own the kwtuple object */ int pos; /* number of positional-only arguments */ int min; /* minimal number of arguments */ int max; /* maximal number of positional arguments */ diff --git a/Misc/NEWS.d/next/C API/2023-11-10-10-24-28.gh-issue-111956.ImE6Cx.rst b/Misc/NEWS.d/next/C API/2023-11-10-10-24-28.gh-issue-111956.ImE6Cx.rst new file mode 100644 index 000000000000000..30ee07aa2f1f9bb --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-10-10-24-28.gh-issue-111956.ImE6Cx.rst @@ -0,0 +1,2 @@ +Add internal-only one-time initialization API: ``_PyOnceFlag`` and +``_PyOnceFlag_CallOnce``. diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index 82a0c827deeddf9..70112b5984da0d4 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -341,6 +341,37 @@ test_lock_benchmark(PyObject *module, PyObject *obj) Py_RETURN_NONE; } +static int +init_maybe_fail(void *arg) +{ + int *counter = (int *)arg; + (*counter)++; + if (*counter < 5) { + // failure + return 0; + } + assert(*counter == 5); + return 1; +} + +static PyObject * +test_lock_once(PyObject *self, PyObject *obj) +{ + _PyOnceFlag once = {0}; + int counter = 0; + for (int i = 0; i < 10; i++) { + int res = _PyOnceFlag_CallOnce(&once, init_maybe_fail, &counter); + if (i < 4) { + assert(res == 0); + } + else { + assert(res == 1); + assert(counter == 5); + } + } + Py_RETURN_NONE; +} + static PyMethodDef test_methods[] = { {"test_lock_basic", test_lock_basic, METH_NOARGS}, {"test_lock_two_threads", test_lock_two_threads, METH_NOARGS}, @@ -348,6 +379,7 @@ static PyMethodDef test_methods[] = { {"test_lock_counter_slow", test_lock_counter_slow, METH_NOARGS}, _TESTINTERNALCAPI_BENCHMARK_LOCKS_METHODDEF {"test_lock_benchmark", test_lock_benchmark, METH_NOARGS}, + {"test_lock_once", test_lock_once, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index f61099b97055adb..c0665c70dc372c9 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1102,13 +1102,6 @@ def visitModule(self, mod): static int init_types(struct ast_state *state) { - // init_types() must not be called after _PyAST_Fini() - // has been called - assert(state->initialized >= 0); - - if (state->initialized) { - return 1; - } if (init_identifiers(state) < 0) { return 0; } @@ -1125,7 +1118,6 @@ def visitModule(self, mod): self.file.write(textwrap.dedent(''' state->recursion_depth = 0; state->recursion_limit = 0; - state->initialized = 1; return 1; } ''')) @@ -1480,7 +1472,8 @@ def visit(self, object): def generate_ast_state(module_state, f): f.write('struct ast_state {\n') - f.write(' int initialized;\n') + f.write(' _PyOnceFlag once;\n') + f.write(' int finalized;\n') f.write(' int recursion_depth;\n') f.write(' int recursion_limit;\n') for s in module_state: @@ -1500,11 +1493,8 @@ def generate_ast_fini(module_state, f): f.write(textwrap.dedent(""" Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); - #if !defined(NDEBUG) - state->initialized = -1; - #else - state->initialized = 0; - #endif + state->finalized = 1; + state->once = (_PyOnceFlag){0}; } """)) @@ -1543,6 +1533,7 @@ def generate_module_def(mod, metadata, f, internal_h): #include "pycore_ast.h" #include "pycore_ast_state.h" // struct ast_state #include "pycore_ceval.h" // _Py_EnterRecursiveCall + #include "pycore_lock.h" // _PyOnceFlag #include "pycore_interp.h" // _PyInterpreterState.ast #include "pycore_pystate.h" // _PyInterpreterState_GET() #include @@ -1555,7 +1546,8 @@ def generate_module_def(mod, metadata, f, internal_h): { PyInterpreterState *interp = _PyInterpreterState_GET(); struct ast_state *state = &interp->ast; - if (!init_types(state)) { + assert(!state->finalized); + if (!_PyOnceFlag_CallOnce(&state->once, (_Py_once_fn_t *)&init_types, state)) { return NULL; } return state; @@ -1628,6 +1620,9 @@ def write_internal_h_header(mod, f): print(textwrap.dedent(""" #ifndef Py_INTERNAL_AST_STATE_H #define Py_INTERNAL_AST_STATE_H + + #include "pycore_lock.h" // _PyOnceFlag + #ifdef __cplusplus extern "C" { #endif diff --git a/Python/Python-ast.c b/Python/Python-ast.c index a197d44868b5aa3..99136bdc1044c9a 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -4,6 +4,7 @@ #include "pycore_ast.h" #include "pycore_ast_state.h" // struct ast_state #include "pycore_ceval.h" // _Py_EnterRecursiveCall +#include "pycore_lock.h" // _PyOnceFlag #include "pycore_interp.h" // _PyInterpreterState.ast #include "pycore_pystate.h" // _PyInterpreterState_GET() #include @@ -16,7 +17,8 @@ get_ast_state(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); struct ast_state *state = &interp->ast; - if (!init_types(state)) { + assert(!state->finalized); + if (!_PyOnceFlag_CallOnce(&state->once, (_Py_once_fn_t *)&init_types, state)) { return NULL; } return state; @@ -271,11 +273,8 @@ void _PyAST_Fini(PyInterpreterState *interp) Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); -#if !defined(NDEBUG) - state->initialized = -1; -#else - state->initialized = 0; -#endif + state->finalized = 1; + state->once = (_PyOnceFlag){0}; } static int init_identifiers(struct ast_state *state) @@ -1126,13 +1125,6 @@ static int add_ast_fields(struct ast_state *state) static int init_types(struct ast_state *state) { - // init_types() must not be called after _PyAST_Fini() - // has been called - assert(state->initialized >= 0); - - if (state->initialized) { - return 1; - } if (init_identifiers(state) < 0) { return 0; } @@ -1920,7 +1912,6 @@ init_types(struct ast_state *state) state->recursion_depth = 0; state->recursion_limit = 0; - state->initialized = 1; return 1; } diff --git a/Python/getargs.c b/Python/getargs.c index 5a12ca8def74faa..53d1150b7a4ff76 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1877,8 +1877,9 @@ new_kwtuple(const char * const *keywords, int total, int pos) } static int -_parser_init(struct _PyArg_Parser *parser) +_parser_init(void *arg) { + struct _PyArg_Parser *parser = (struct _PyArg_Parser *)arg; const char * const *keywords = parser->keywords; assert(keywords != NULL); assert(parser->pos == 0 && @@ -1925,40 +1926,27 @@ _parser_init(struct _PyArg_Parser *parser) parser->min = min; parser->max = max; parser->kwtuple = kwtuple; - parser->initialized = owned ? 1 : -1; + parser->is_kwtuple_owned = owned; assert(parser->next == NULL); - parser->next = _PyRuntime.getargs.static_parsers; - _PyRuntime.getargs.static_parsers = parser; + parser->next = _Py_atomic_load_ptr(&_PyRuntime.getargs.static_parsers); + do { + // compare-exchange updates parser->next on failure + } while (_Py_atomic_compare_exchange_ptr(&_PyRuntime.getargs.static_parsers, + &parser->next, parser)); return 1; } static int parser_init(struct _PyArg_Parser *parser) { - // volatile as it can be modified by other threads - // and should not be optimized or reordered by compiler - if (*((volatile int *)&parser->initialized)) { - assert(parser->kwtuple != NULL); - return 1; - } - PyThread_acquire_lock(_PyRuntime.getargs.mutex, WAIT_LOCK); - // Check again if another thread initialized the parser - // while we were waiting for the lock. - if (*((volatile int *)&parser->initialized)) { - assert(parser->kwtuple != NULL); - PyThread_release_lock(_PyRuntime.getargs.mutex); - return 1; - } - int ret = _parser_init(parser); - PyThread_release_lock(_PyRuntime.getargs.mutex); - return ret; + return _PyOnceFlag_CallOnce(&parser->once, &_parser_init, parser); } static void parser_clear(struct _PyArg_Parser *parser) { - if (parser->initialized == 1) { + if (parser->is_kwtuple_owned) { Py_CLEAR(parser->kwtuple); } } diff --git a/Python/lock.c b/Python/lock.c index 3dad2aa93b5cc97..df8d7b8c8a788fb 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -295,3 +295,52 @@ PyEvent_WaitTimed(PyEvent *evt, _PyTime_t timeout_ns) return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED; } } + +static int +unlock_once(_PyOnceFlag *o, int res) +{ + // On success (res=1), we set the state to _Py_ONCE_INITIALIZED. + // On failure (res=0), we reset the state to _Py_UNLOCKED. + uint8_t new_value = res ? _Py_ONCE_INITIALIZED : _Py_UNLOCKED; + + uint8_t old_value = _Py_atomic_exchange_uint8(&o->v, new_value); + if ((old_value & _Py_HAS_PARKED) != 0) { + // wake up anyone waiting on the once flag + _PyParkingLot_UnparkAll(&o->v); + } + return res; +} + +int +_PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg) +{ + uint8_t v = _Py_atomic_load_uint8(&flag->v); + for (;;) { + if (v == _Py_UNLOCKED) { + if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, _Py_LOCKED)) { + continue; + } + int res = fn(arg); + return unlock_once(flag, res); + } + + if (v == _Py_ONCE_INITIALIZED) { + return 1; + } + + // The once flag is initializing (locked). + assert((v & _Py_LOCKED)); + if (!(v & _Py_HAS_PARKED)) { + // We are the first waiter. Set the _Py_HAS_PARKED flag. + uint8_t new_value = v | _Py_HAS_PARKED; + if (!_Py_atomic_compare_exchange_uint8(&flag->v, &v, new_value)) { + continue; + } + v = new_value; + } + + // Wait for initialization to finish. + _PyParkingLot_Park(&flag->v, &v, sizeof(v), -1, NULL, 1); + v = _Py_atomic_load_uint8(&flag->v); + } +}