diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index f61f0af6bb263d..077508e6c58f6c 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -5,6 +5,7 @@ extern "C" { #endif +#include "pycore_hashtable.h" // _Py_hashtable_t #include "pycore_time.h" // _PyTime_t extern int _PyImport_IsInitialized(PyInterpreterState *); @@ -36,19 +37,15 @@ struct _import_runtime_state { See PyInterpreterState.modules_by_index for more info. */ Py_ssize_t last_module_index; struct { - /* A thread state tied to the main interpreter, - used exclusively for when the extensions dict is access/modified - from an arbitrary thread. */ - PyThreadState main_tstate; - /* A lock to guard the dict. */ + /* A lock to guard the cache. */ PyThread_type_lock mutex; - /* A dict mapping (filename, name) to PyModuleDef for modules. + /* The actual cache of (filename, name, PyModuleDef) for modules. Only legacy (single-phase init) extension modules are added and only if they support multiple initialization (m_size >- 0) or are imported in the main interpreter. This is initialized lazily in _PyImport_FixupExtensionObject(). Modules are added there and looked up in _imp.find_extension(). */ - PyObject *dict; + _Py_hashtable_t *hashtable; } extensions; /* Package context -- the full module name for package imports */ const char * pkgcontext; diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index f0db757c77c58b..acc6cf953343fb 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -125,11 +125,6 @@ extern PyThreadState * _PyThreadState_New(PyInterpreterState *interp); extern void _PyThreadState_Bind(PyThreadState *tstate); extern void _PyThreadState_DeleteExcept(PyThreadState *tstate); -extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); -extern void _PyThreadState_ClearDetached(PyThreadState *); -extern void _PyThreadState_BindDetached(PyThreadState *); -extern void _PyThreadState_UnbindDetached(PyThreadState *); - // Export for '_testinternalcapi' shared extension PyAPI_FUNC(PyObject*) _PyThreadState_GetDict(PyThreadState *tstate); diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 840e41c59ca737..dcfceef4f175a2 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -97,11 +97,6 @@ extern PyTypeObject _PyExc_MemoryError; in accordance with the specification. */ \ .autoTSSkey = Py_tss_NEEDS_INIT, \ .parser = _parser_runtime_state_INIT, \ - .imports = { \ - .extensions = { \ - .main_tstate = _PyThreadState_INIT, \ - }, \ - }, \ .ceval = { \ .perf = _PyEval_RUNTIME_PERF_INIT, \ }, \ diff --git a/Python/import.c b/Python/import.c index 711ed5d32ca97f..e381d1fd7d433e 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2,10 +2,12 @@ #include "Python.h" +#include "pycore_hashtable.h" // _Py_hashtable_new_full() #include "pycore_import.h" // _PyImport_BootstrapImp() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // struct _import_runtime_state #include "pycore_namespace.h" // _PyNamespace_Type +#include "pycore_object.h" // _Py_SetImmortal() #include "pycore_pyerrors.h" // _PyErr_SetString() #include "pycore_pyhash.h" // _Py_KeyedHash() #include "pycore_pylifecycle.h" @@ -905,35 +907,79 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +static void * +hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) +{ + Py_ssize_t str1_len, str2_len; + const char *str1_data = PyUnicode_AsUTF8AndSize(str1, &str1_len); + const char *str2_data = PyUnicode_AsUTF8AndSize(str2, &str2_len); + if (str1_data == NULL || str2_data == NULL) { + return NULL; + } + /* Make sure sep and the NULL byte won't cause an overflow. */ + assert(SIZE_MAX - str1_len - str2_len > 2); + size_t size = str1_len + 1 + str2_len + 1; + + char *key = PyMem_RawMalloc(size); + if (key == NULL) { + PyErr_NoMemory(); + return NULL; + } + + strncpy(key, str1_data, str1_len); + key[str1_len] = sep; + strncpy(key + str1_len + 1, str2_data, str2_len + 1); + assert(strlen(key) == size - 1); + return key; +} + +static Py_uhash_t +hashtable_hash_str(const void *key) +{ + return _Py_HashBytes(key, strlen((const char *)key)); +} + +static int +hashtable_compare_str(const void *key1, const void *key2) +{ + return strcmp((const char *)key1, (const char *)key2) == 0; +} + static void -_extensions_cache_init(void) +hashtable_destroy_str(void *ptr) { - /* The runtime (i.e. main interpreter) must be initializing, - so we don't need to worry about the lock. */ - _PyThreadState_InitDetached(&EXTENSIONS.main_tstate, - _PyInterpreterState_Main()); + PyMem_RawFree(ptr); } +#define HTSEP ':' + static PyModuleDef * _extensions_cache_get(PyObject *filename, PyObject *name) { PyModuleDef *def = NULL; + void *key = NULL; extensions_lock_acquire(); - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { + if (EXTENSIONS.hashtable == NULL) { goto finally; } - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { + key = hashtable_key_from_2_strings(filename, name, HTSEP); + if (key == NULL) { + goto finally; + } + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( + EXTENSIONS.hashtable, key); + if (entry == NULL) { goto finally; } - def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); + def = (PyModuleDef *)entry->value; finally: - Py_XDECREF(key); extensions_lock_release(); + if (key != NULL) { + PyMem_RawFree(key); + } return def; } @@ -941,124 +987,99 @@ static int _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) { int res = -1; - PyThreadState *oldts = NULL; extensions_lock_acquire(); - /* Swap to the main interpreter, if necessary. This matters if - the dict hasn't been created yet or if the item isn't in the - dict yet. In both cases we must ensure the relevant objects - are created using the main interpreter. */ - PyThreadState *main_tstate = &EXTENSIONS.main_tstate; - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_Py_IsMainInterpreter(interp)) { - _PyThreadState_BindDetached(main_tstate); - oldts = _PyThreadState_Swap(interp->runtime, main_tstate); - assert(!_Py_IsMainInterpreter(oldts->interp)); - - /* Make sure the name and filename objects are owned - by the main interpreter. */ - name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name)); - assert(name != NULL); - filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename)); - assert(filename != NULL); + if (EXTENSIONS.hashtable == NULL) { + _Py_hashtable_allocator_t alloc = {PyMem_RawMalloc, PyMem_RawFree}; + EXTENSIONS.hashtable = _Py_hashtable_new_full( + hashtable_hash_str, + hashtable_compare_str, + hashtable_destroy_str, // key + /* There's no need to decref the def since it's immortal. */ + NULL, // value + &alloc + ); + if (EXTENSIONS.hashtable == NULL) { + PyErr_NoMemory(); + goto finally; + } } - PyObject *key = PyTuple_Pack(2, filename, name); + void *key = hashtable_key_from_2_strings(filename, name, HTSEP); if (key == NULL) { goto finally; } - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { - extensions = PyDict_New(); - if (extensions == NULL) { + int already_set = 0; + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( + EXTENSIONS.hashtable, key); + if (entry == NULL) { + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, def) < 0) { + PyMem_RawFree(key); + PyErr_NoMemory(); goto finally; } - EXTENSIONS.dict = extensions; - } - - PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); - if (PyErr_Occurred()) { - goto finally; } - else if (actual != NULL) { - /* We expect it to be static, so it must be the same pointer. */ - assert(def == actual); - res = 0; - goto finally; + else { + if (entry->value == NULL) { + entry->value = def; + } + else { + /* We expect it to be static, so it must be the same pointer. */ + assert((PyModuleDef *)entry->value == def); + already_set = 1; + } + PyMem_RawFree(key); } - - /* This might trigger a resize, which is why we must switch - to the main interpreter. */ - res = PyDict_SetItem(extensions, key, (PyObject *)def); - if (res < 0) { - res = -1; - goto finally; + if (!already_set) { + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortal(def); } res = 0; finally: - Py_XDECREF(key); - if (oldts != NULL) { - _PyThreadState_Swap(interp->runtime, oldts); - _PyThreadState_UnbindDetached(main_tstate); - Py_DECREF(name); - Py_DECREF(filename); - } extensions_lock_release(); return res; } -static int +static void _extensions_cache_delete(PyObject *filename, PyObject *name) { - int res = -1; - PyThreadState *oldts = NULL; + void *key = NULL; extensions_lock_acquire(); - PyObject *key = PyTuple_Pack(2, filename, name); - if (key == NULL) { + if (EXTENSIONS.hashtable == NULL) { + /* It was never added. */ goto finally; } - PyObject *extensions = EXTENSIONS.dict; - if (extensions == NULL) { - res = 0; + key = hashtable_key_from_2_strings(filename, name, HTSEP); + if (key == NULL) { goto finally; } - PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); - if (PyErr_Occurred()) { + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( + EXTENSIONS.hashtable, key); + if (entry == NULL) { + /* It was never added. */ goto finally; } - else if (actual == NULL) { - /* It was already removed or never added. */ - res = 0; + if (entry->value == NULL) { + /* It was already removed. */ goto finally; } - - /* Swap to the main interpreter, if necessary. */ - PyThreadState *main_tstate = &EXTENSIONS.main_tstate; - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_Py_IsMainInterpreter(interp)) { - _PyThreadState_BindDetached(main_tstate); - oldts = _PyThreadState_Swap(interp->runtime, main_tstate); - assert(!_Py_IsMainInterpreter(oldts->interp)); - } - - if (PyDict_DelItem(extensions, key) < 0) { - goto finally; - } - res = 0; + /* If we hadn't made the stored defs immortal, we would decref here. + However, this decref would be problematic if the module def were + dynamically allocated, it were the last ref, and this function + were called with an interpreter other than the def's owner. */ + entry->value = NULL; finally: - if (oldts != NULL) { - _PyThreadState_Swap(interp->runtime, oldts); - _PyThreadState_UnbindDetached(main_tstate); - } - Py_XDECREF(key); extensions_lock_release(); - return res; + if (key != NULL) { + PyMem_RawFree(key); + } } static void @@ -1066,11 +1087,12 @@ _extensions_cache_clear_all(void) { /* The runtime (i.e. main interpreter) must be finalizing, so we don't need to worry about the lock. */ - // XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); - Py_CLEAR(EXTENSIONS.dict); - _PyThreadState_ClearDetached(&EXTENSIONS.main_tstate); + _Py_hashtable_destroy(EXTENSIONS.hashtable); + EXTENSIONS.hashtable = NULL; } +#undef HTSEP + static bool check_multi_interp_extensions(PyInterpreterState *interp) @@ -1231,6 +1253,8 @@ import_find_extension(PyThreadState *tstate, PyObject *name, PyObject *m_copy = def->m_base.m_copy; /* Module does not support repeated initialization */ if (m_copy == NULL) { + /* It might be a core module (e.g. sys & builtins), + for which we don't set m_copy. */ m_copy = get_core_module_dict(tstate->interp, name, filename); if (m_copy == NULL) { return NULL; @@ -1300,9 +1324,7 @@ clear_singlephase_extension(PyInterpreterState *interp, } /* Clear the cached module def. */ - if (_extensions_cache_delete(filename, name) < 0) { - return -1; - } + _extensions_cache_delete(filename, name); return 0; } @@ -3051,6 +3073,8 @@ void _PyImport_Fini(void) { /* Destroy the database used by _PyImport_{Fixup,Find}Extension */ + // XXX Should we actually leave them (mostly) intact, since we don't + // ever dlclose() the module files? _extensions_cache_clear_all(); /* Use the same memory allocator as _PyImport_Init(). */ @@ -3088,10 +3112,6 @@ _PyImport_Fini2(void) PyStatus _PyImport_InitCore(PyThreadState *tstate, PyObject *sysmod, int importlib) { - if (_Py_IsMainInterpreter(tstate->interp)) { - _extensions_cache_init(); - } - // XXX Initialize here: interp->modules and interp->import_func. // XXX Initialize here: sys.modules and sys.meta_path. diff --git a/Python/pystate.c b/Python/pystate.c index 3ec131b6cba994..a1864cc3249e2f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1640,75 +1640,6 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) } -//------------------------- -// "detached" thread states -//------------------------- - -void -_PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp) -{ - _PyRuntimeState *runtime = interp->runtime; - - HEAD_LOCK(runtime); - interp->threads.next_unique_id += 1; - uint64_t id = interp->threads.next_unique_id; - HEAD_UNLOCK(runtime); - - init_threadstate(tstate, interp, id); - // We do not call add_threadstate(). -} - -void -_PyThreadState_ClearDetached(PyThreadState *tstate) -{ - assert(!tstate->_status.bound); - assert(!tstate->_status.bound_gilstate); - assert(tstate->datastack_chunk == NULL); - assert(tstate->thread_id == 0); - assert(tstate->native_thread_id == 0); - assert(tstate->next == NULL); - assert(tstate->prev == NULL); - - PyThreadState_Clear(tstate); - clear_datastack(tstate); -} - -void -_PyThreadState_BindDetached(PyThreadState *tstate) -{ - assert(!_Py_IsMainInterpreter( - current_fast_get(tstate->interp->runtime)->interp)); - assert(_Py_IsMainInterpreter(tstate->interp)); - bind_tstate(tstate); - /* Unlike _PyThreadState_Bind(), we do not modify gilstate TSS. */ -} - -void -_PyThreadState_UnbindDetached(PyThreadState *tstate) -{ - assert(!_Py_IsMainInterpreter( - current_fast_get(tstate->interp->runtime)->interp)); - assert(_Py_IsMainInterpreter(tstate->interp)); - assert(tstate_is_alive(tstate)); - assert(!tstate->_status.active); - assert(gilstate_tss_get(tstate->interp->runtime) != tstate); - - unbind_tstate(tstate); - - /* This thread state may be bound/unbound repeatedly, - so we must erase evidence that it was ever bound (or unbound). */ - tstate->_status.bound = 0; - tstate->_status.unbound = 0; - - /* We must fully unlink the thread state from any OS thread, - to allow it to be bound more than once. */ - tstate->thread_id = 0; -#ifdef PY_HAVE_THREAD_NATIVE_ID - tstate->native_thread_id = 0; -#endif -} - - //---------- // accessors //----------