From 6e7ddcce1a6c402b96042be9c2ee0c7e9e5ebcaf Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 18 Jun 2020 15:55:11 -0400 Subject: [PATCH] Support for PEP 703 / nogil Python --- include/pybind11/detail/class.h | 104 +++++++------- include/pybind11/detail/internals.h | 149 ++++++++++++++++++--- include/pybind11/detail/type_caster_base.h | 48 ++++--- include/pybind11/numpy.h | 4 +- include/pybind11/pybind11.h | 106 +++++++++------ tests/pybind11_tests.cpp | 2 +- 6 files changed, 283 insertions(+), 130 deletions(-) diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h index bc2b40c50a..145edd52f0 100644 --- a/include/pybind11/detail/class.h +++ b/include/pybind11/detail/class.h @@ -208,39 +208,40 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P /// Cleanup the type-info for a pybind11-registered type. extern "C" inline void pybind11_meta_dealloc(PyObject *obj) { - auto *type = (PyTypeObject *) obj; - auto &internals = get_internals(); - - // A pybind11-registered type will: - // 1) be found in internals.registered_types_py - // 2) have exactly one associated `detail::type_info` - auto found_type = internals.registered_types_py.find(type); - if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1 - && found_type->second[0]->type == type) { - - auto *tinfo = found_type->second[0]; - auto tindex = std::type_index(*tinfo->cpptype); - internals.direct_conversions.erase(tindex); - - if (tinfo->module_local) { - get_local_internals().registered_types_cpp.erase(tindex); - } else { - internals.registered_types_cpp.erase(tindex); - } - internals.registered_types_py.erase(tinfo->type); - - // Actually just `std::erase_if`, but that's only available in C++20 - auto &cache = internals.inactive_override_cache; - for (auto it = cache.begin(), last = cache.end(); it != last;) { - if (it->first == (PyObject *) tinfo->type) { - it = cache.erase(it); + with_internals([obj](internals &internals) { + auto *type = (PyTypeObject *) obj; + + // A pybind11-registered type will: + // 1) be found in internals.registered_types_py + // 2) have exactly one associated `detail::type_info` + auto found_type = internals.registered_types_py.find(type); + if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1 + && found_type->second[0]->type == type) { + + auto *tinfo = found_type->second[0]; + auto tindex = std::type_index(*tinfo->cpptype); + internals.direct_conversions.erase(tindex); + + if (tinfo->module_local) { + get_local_internals().registered_types_cpp.erase(tindex); } else { - ++it; + internals.registered_types_cpp.erase(tindex); + } + internals.registered_types_py.erase(tinfo->type); + + // Actually just `std::erase_if`, but that's only available in C++20 + auto &cache = internals.inactive_override_cache; + for (auto it = cache.begin(), last = cache.end(); it != last;) { + if (it->first == (PyObject *) tinfo->type) { + it = cache.erase(it); + } else { + ++it; + } } - } - delete tinfo; - } + delete tinfo; + } + }); PyType_Type.tp_dealloc(obj); } @@ -313,19 +314,20 @@ inline void traverse_offset_bases(void *valueptr, } inline bool register_instance_impl(void *ptr, instance *self) { - get_internals().registered_instances.emplace(ptr, self); + with_instance_map(ptr, [&](instance_map &instances) { instances.emplace(ptr, self); }); return true; // unused, but gives the same signature as the deregister func } inline bool deregister_instance_impl(void *ptr, instance *self) { - auto ®istered_instances = get_internals().registered_instances; - auto range = registered_instances.equal_range(ptr); - for (auto it = range.first; it != range.second; ++it) { - if (self == it->second) { - registered_instances.erase(it); - return true; + return with_instance_map(ptr, [&](instance_map &instances) { + auto range = instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + if (self == it->second) { + instances.erase(it); + return true; + } } - } - return false; + return false; + }); } inline void register_instance(instance *self, void *valptr, const type_info *tinfo) { @@ -380,23 +382,27 @@ extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject } inline void add_patient(PyObject *nurse, PyObject *patient) { - auto &internals = get_internals(); auto *instance = reinterpret_cast(nurse); instance->has_patients = true; Py_INCREF(patient); - internals.patients[nurse].push_back(patient); + + with_internals([&](internals &internals) { internals.patients[nurse].push_back(patient); }); } inline void clear_patients(PyObject *self) { auto *instance = reinterpret_cast(self); - auto &internals = get_internals(); - auto pos = internals.patients.find(self); - assert(pos != internals.patients.end()); - // Clearing the patients can cause more Python code to run, which - // can invalidate the iterator. Extract the vector of patients - // from the unordered_map first. - auto patients = std::move(pos->second); - internals.patients.erase(pos); + std::vector patients; + + with_internals([&](internals &internals) { + auto pos = internals.patients.find(self); + assert(pos != internals.patients.end()); + // Clearing the patients can cause more Python code to run, which + // can invalidate the iterator. Extract the vector of patients + // from the unordered_map first. + patients = std::move(pos->second); + internals.patients.erase(pos); + }); + instance->has_patients = false; for (PyObject *&patient : patients) { Py_CLEAR(patient); diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index aaa7f8686e..afcd0d35aa 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -18,6 +18,8 @@ #include "../pytypes.h" #include +#include +#include /// Tracks the `internals` and `type_info` ABI version independent of the main library version. /// @@ -34,8 +36,11 @@ /// further ABI-incompatible changes may be made before the ABI is officially /// changed to the new version. #ifndef PYBIND11_INTERNALS_VERSION -# if PY_VERSION_HEX >= 0x030C0000 -// Version bump for Python 3.12+, before first 3.12 beta release. +# if PY_VERSION_HEX >= 0x030D0000 || defined(Py_NOGIL) +// Version bump for Python 3.13+ or Py_NOGIL. +# define PYBIND11_INTERNALS_VERSION 6 +# elif PY_VERSION_HEX >= 0x030C0000 +// Version bump for Python 3.12, before first 3.12 beta release. # define PYBIND11_INTERNALS_VERSION 5 # else # define PYBIND11_INTERNALS_VERSION 4 @@ -162,15 +167,31 @@ struct override_hash { } }; +using instance_map = std::unordered_multimap; + +struct instance_map_shard { + std::mutex mutex; + instance_map registered_instances; + char padding[64 - (sizeof(std::mutex) + sizeof(instance_map)) % 64]; +}; + /// Internal data structure used to track registered instances and types. /// Whenever binary incompatible changes are made to this structure, /// `PYBIND11_INTERNALS_VERSION` must be incremented. struct internals { +#if PYBIND11_INTERNALS_VERSION >= 6 + std::mutex mutex; +#endif // std::type_index -> pybind11's type information type_map registered_types_cpp; // PyTypeObject* -> base type_info(s) std::unordered_map> registered_types_py; - std::unordered_multimap registered_instances; // void * -> instance* +#if PYBIND11_INTERNALS_VERSION >= 6 + std::unique_ptr instance_shards; // void * -> instance* + size_t instance_shards_mask; +#else + instance_map registered_instances; // void * -> instance* +#endif std::unordered_set, override_hash> inactive_override_cache; type_map> direct_conversions; @@ -463,6 +484,20 @@ inline internals **get_internals_pp_from_capsule(handle obj) { return static_cast(raw_ptr); } +inline uint64_t next_pow2(uint64_t x) { + // Round-up to the next power of two. + // See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + x--; + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + x++; + return x; +} + /// Return a reference to the current `internals` data PYBIND11_NOINLINE internals &get_internals() { auto **&internals_pp = get_internals_pp(); @@ -531,6 +566,18 @@ PYBIND11_NOINLINE internals &get_internals() { internals_ptr->static_property_type = make_static_property_type(); internals_ptr->default_metaclass = make_default_metaclass(); internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); +#if PYBIND11_INTERNALS_VERSION >= 6 +# if defined(Py_NOGIL) + size_t num_shards = (size_t) next_pow2(2 * std::thread::hardware_concurrency()); + if (num_shards == 0) { + num_shards = 1; + } +# else + size_t num_shards = 1; +# endif + internals_ptr->instance_shards.reset(new instance_map_shard[num_shards]); + internals_ptr->instance_shards_mask = num_shards - 1; +#endif // PYBIND11_INTERNALS_VERSION >= 6 } return **internals_pp; } @@ -591,13 +638,77 @@ inline local_internals &get_local_internals() { return *locals; } +#if PYBIND11_INTERNALS_VERSION >= 6 && defined(Py_NOGIL) +# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock lock((internals).mutex) +#else +# define PYBIND11_LOCK_INTERNALS(internals) +#endif + +template +inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) { + auto &internals = get_internals(); + PYBIND11_LOCK_INTERNALS(internals); + return cb(internals); +} + +inline uint64_t splitmix64(uint64_t z) { + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} + +template +inline auto with_instance_map(const void *ptr, const F &cb) + -> decltype(cb(std::declval())) { + auto &internals = get_internals(); + +#if PYBIND11_INTERNALS_VERSION >= 6 + // Hash address to compute shard, but ignore low bits. We'd like allocations + // from the same thread/core to map to the same shard and allocations from + // other threads/cores to map to other shards. Using the high bits is a good + // heuristic because memory allocators often have a per-thread + // arena/superblock/segment from which smaller allocations are served. + auto addr = reinterpret_cast(ptr); + uint64_t hash = splitmix64((uint64_t) (addr >> 20)); + size_t idx = (size_t) hash & internals.instance_shards_mask; + + auto &shard = internals.instance_shards[idx]; +# if defined(Py_NOGIL) + std::unique_lock lock(shard.mutex); +# endif + return cb(shard.registered_instances); +#else + (void) ptr; + return cb(internals.registered_instances); +#endif +} + +inline size_t num_registered_instances() { + auto &internals = get_internals(); +#if PYBIND11_INTERNALS_VERSION >= 6 + size_t count = 0; + for (size_t i = 0; i <= internals.instance_shards_mask; ++i) { + auto &shard = internals.instance_shards[i]; + std::unique_lock lock(shard.mutex); + count += shard.registered_instances.size(); + } + return count; +#else + return internals.registered_instances.size(); +#endif +} + /// Constructs a std::string with the given arguments, stores it in `internals`, and returns its /// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only /// cleared when the program exits or after interpreter shutdown (when embedding), and so are /// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name). template const char *c_str(Args &&...args) { - auto &strings = get_internals().static_strings; + // GCC 4.8 doesn't like parameter unpack within lambda capture, so use + // PYBIND11_LOCK_INTERNALS. + auto &internals = get_internals(); + PYBIND11_LOCK_INTERNALS(internals); + auto &strings = internals.static_strings; strings.emplace_front(std::forward(args)...); return strings.front().c_str(); } @@ -627,15 +738,18 @@ PYBIND11_NAMESPACE_END(detail) /// pybind11 version) running in the current interpreter. Names starting with underscores /// are reserved for internal usage. Returns `nullptr` if no matching entry was found. PYBIND11_NOINLINE void *get_shared_data(const std::string &name) { - auto &internals = detail::get_internals(); - auto it = internals.shared_data.find(name); - return it != internals.shared_data.end() ? it->second : nullptr; + return detail::with_internals([&](detail::internals &internals) { + auto it = internals.shared_data.find(name); + return it != internals.shared_data.end() ? it->second : nullptr; + }); } /// Set the shared data that can be later recovered by `get_shared_data()`. PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) { - detail::get_internals().shared_data[name] = data; - return data; + return detail::with_internals([&](detail::internals &internals) { + internals.shared_data[name] = data; + return data; + }); } /// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if @@ -643,14 +757,15 @@ PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) { /// added to the shared data under the given name and a reference to it is returned. template T &get_or_create_shared_data(const std::string &name) { - auto &internals = detail::get_internals(); - auto it = internals.shared_data.find(name); - T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr); - if (!ptr) { - ptr = new T(); - internals.shared_data[name] = ptr; - } - return *ptr; + return *detail::with_internals([&](detail::internals &internals) { + auto it = internals.shared_data.find(name); + T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr); + if (!ptr) { + ptr = new T(); + internals.shared_data[name] = ptr; + } + return ptr; + }); } PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 16387506cf..76349151be 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -203,12 +203,15 @@ inline detail::type_info *get_local_type_info(const std::type_index &tp) { } inline detail::type_info *get_global_type_info(const std::type_index &tp) { - auto &types = get_internals().registered_types_cpp; - auto it = types.find(tp); - if (it != types.end()) { - return it->second; - } - return nullptr; + return with_internals([&](internals &internals) { + detail::type_info *type_info = nullptr; + auto &types = internals.registered_types_cpp; + auto it = types.find(tp); + if (it != types.end()) { + type_info = it->second; + } + return type_info; + }); } /// Return the type info for a given C++ type; on lookup failure can either throw or return @@ -239,15 +242,17 @@ PYBIND11_NOINLINE handle get_type_handle(const std::type_info &tp, bool throw_if // Searches the inheritance graph for a registered Python instance, using all_type_info(). PYBIND11_NOINLINE handle find_registered_python_instance(void *src, const detail::type_info *tinfo) { - auto it_instances = get_internals().registered_instances.equal_range(src); - for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) { - for (auto *instance_type : detail::all_type_info(Py_TYPE(it_i->second))) { - if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) { - return handle((PyObject *) it_i->second).inc_ref(); + return with_instance_map(src, [&](instance_map &instances) { + auto it_instances = instances.equal_range(src); + for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) { + for (auto *instance_type : detail::all_type_info(Py_TYPE(it_i->second))) { + if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) { + return handle((PyObject *) it_i->second).inc_ref(); + } } } - } - return handle(); + return handle(); + }); } struct value_and_holder { @@ -471,16 +476,17 @@ PYBIND11_NOINLINE bool isinstance_generic(handle obj, const std::type_info &tp) } PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_info *type) { - auto &instances = get_internals().registered_instances; - auto range = instances.equal_range(ptr); - for (auto it = range.first; it != range.second; ++it) { - for (const auto &vh : values_and_holders(it->second)) { - if (vh.type == type) { - return handle((PyObject *) it->second); + return with_instance_map(ptr, [&](instance_map &instances) { + auto range = instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + for (const auto &vh : values_and_holders(it->second)) { + if (vh.type == type) { + return handle((PyObject *) it->second); + } } } - } - return handle(); + return handle(); + }); } inline PyThreadState *get_thread_state_unchecked() { diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h index 36077ec04d..1e31dceab7 100644 --- a/include/pybind11/numpy.h +++ b/include/pybind11/numpy.h @@ -1418,7 +1418,9 @@ PYBIND11_NOINLINE void register_structured_dtype(any_container auto tindex = std::type_index(tinfo); numpy_internals.registered_dtypes[tindex] = {dtype_ptr, std::move(format_str)}; - get_internals().direct_conversions[tindex].push_back(direct_converter); + with_internals([tindex, &direct_converter](internals &internals) { + internals.direct_conversions[tindex].push_back(direct_converter); + }); } template diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 3bce1a01ba..588fd751f1 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -1013,13 +1013,20 @@ class cpp_function : public function { - delegate translation to the next translator by throwing a new type of exception. */ - auto &local_exception_translators - = get_local_internals().registered_exception_translators; - if (detail::apply_exception_translators(local_exception_translators)) { - return nullptr; - } - auto &exception_translators = get_internals().registered_exception_translators; - if (detail::apply_exception_translators(exception_translators)) { + bool handled = with_internals([&](internals &internals) { + auto &local_exception_translators + = get_local_internals().registered_exception_translators; + if (detail::apply_exception_translators(local_exception_translators)) { + return true; + } + auto &exception_translators = internals.registered_exception_translators; + if (detail::apply_exception_translators(exception_translators)) { + return true; + } + return false; + }); + + if (handled) { return nullptr; } @@ -1332,15 +1339,16 @@ class generic_type : public object { tinfo->default_holder = rec.default_holder; tinfo->module_local = rec.module_local; - auto &internals = get_internals(); - auto tindex = std::type_index(*rec.type); - tinfo->direct_conversions = &internals.direct_conversions[tindex]; - if (rec.module_local) { - get_local_internals().registered_types_cpp[tindex] = tinfo; - } else { - internals.registered_types_cpp[tindex] = tinfo; - } - internals.registered_types_py[(PyTypeObject *) m_ptr] = {tinfo}; + with_internals([&](internals &internals) { + auto tindex = std::type_index(*rec.type); + tinfo->direct_conversions = &internals.direct_conversions[tindex]; + if (rec.module_local) { + get_local_internals().registered_types_cpp[tindex] = tinfo; + } else { + internals.registered_types_cpp[tindex] = tinfo; + } + internals.registered_types_py[(PyTypeObject *) m_ptr] = {tinfo}; + }); if (rec.bases.size() > 1 || rec.multiple_inheritance) { mark_parents_nonsimple(tinfo->type); @@ -1553,10 +1561,12 @@ class class_ : public detail::generic_type { generic_type::initialize(record); if (has_alias) { - auto &instances = record.module_local ? get_local_internals().registered_types_cpp - : get_internals().registered_types_cpp; - instances[std::type_index(typeid(type_alias))] - = instances[std::type_index(typeid(type))]; + with_internals([&](internals &internals) { + auto &instances = record.module_local ? get_local_internals().registered_types_cpp + : internals.registered_types_cpp; + instances[std::type_index(typeid(type_alias))] + = instances[std::type_index(typeid(type))]; + }); } } @@ -2271,28 +2281,32 @@ keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret) { inline std::pair all_type_info_get_cache(PyTypeObject *type) { - auto res = get_internals() - .registered_types_py + auto res = with_internals([type](internals &internals) { + return internals + .registered_types_py #ifdef __cpp_lib_unordered_map_try_emplace - .try_emplace(type); + .try_emplace(type); #else - .emplace(type, std::vector()); + .emplace(type, std::vector()); #endif + }); if (res.second) { // New cache entry created; set up a weak reference to automatically remove it if the type // gets destroyed: weakref((PyObject *) type, cpp_function([type](handle wr) { - get_internals().registered_types_py.erase(type); - - // TODO consolidate the erasure code in pybind11_meta_dealloc() in class.h - auto &cache = get_internals().inactive_override_cache; - for (auto it = cache.begin(), last = cache.end(); it != last;) { - if (it->first == reinterpret_cast(type)) { - it = cache.erase(it); - } else { - ++it; + with_internals([type](internals &internals) { + internals.registered_types_py.erase(type); + + // TODO consolidate the erasure code in pybind11_meta_dealloc() in class.h + auto &cache = internals.inactive_override_cache; + for (auto it = cache.begin(), last = cache.end(); it != last;) { + if (it->first == reinterpret_cast(type)) { + it = cache.erase(it); + } else { + ++it; + } } - } + }); wr.dec_ref(); })) @@ -2510,8 +2524,10 @@ void implicitly_convertible() { } inline void register_exception_translator(ExceptionTranslator &&translator) { - detail::get_internals().registered_exception_translators.push_front( - std::forward(translator)); + detail::with_internals([&](detail::internals &internals) { + internals.registered_exception_translators.push_front( + std::forward(translator)); + }); } /** @@ -2521,8 +2537,11 @@ inline void register_exception_translator(ExceptionTranslator &&translator) { * the exception. */ inline void register_local_exception_translator(ExceptionTranslator &&translator) { - detail::get_local_internals().registered_exception_translators.push_front( - std::forward(translator)); + detail::with_internals([&](detail::internals &internals) { + (void) internals; + detail::get_local_internals().registered_exception_translators.push_front( + std::forward(translator)); + }); } /** @@ -2681,14 +2700,19 @@ get_type_override(const void *this_ptr, const type_info *this_type, const char * /* Cache functions that aren't overridden in Python to avoid many costly Python dictionary lookups below */ - auto &cache = get_internals().inactive_override_cache; - if (cache.find(key) != cache.end()) { + bool not_overridden = with_internals([&key](internals &internals) { + auto &cache = internals.inactive_override_cache; + return cache.find(key) != cache.end(); + }); + if (not_overridden) { return function(); } function override = getattr(self, name, function()); if (override.is_cpp_function()) { - cache.insert(std::move(key)); + with_internals([&](internals &internals) { + internals.inactive_override_cache.insert(std::move(key)); + }); return function(); } diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp index 6240346487..264840dec8 100644 --- a/tests/pybind11_tests.cpp +++ b/tests/pybind11_tests.cpp @@ -58,7 +58,7 @@ void bind_ConstructorStats(py::module_ &m) { // registered instances to allow instance cleanup checks (invokes a GC first) .def_static("detail_reg_inst", []() { ConstructorStats::gc(); - return py::detail::get_internals().registered_instances.size(); + return py::detail::num_registered_instances(); }); }