Skip to content

Commit

Permalink
Support for PEP 703 / nogil Python
Browse files Browse the repository at this point in the history
  • Loading branch information
colesbury committed Sep 28, 2023
1 parent 8a099e4 commit 6e7ddcc
Show file tree
Hide file tree
Showing 6 changed files with 283 additions and 130 deletions.
104 changes: 55 additions & 49 deletions include/pybind11/detail/class.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,39 +208,40 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P

/// Cleanup the type-info for a pybind11-registered type.
extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
auto *type = (PyTypeObject *) obj;
auto &internals = get_internals();

// A pybind11-registered type will:
// 1) be found in internals.registered_types_py
// 2) have exactly one associated `detail::type_info`
auto found_type = internals.registered_types_py.find(type);
if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1
&& found_type->second[0]->type == type) {

auto *tinfo = found_type->second[0];
auto tindex = std::type_index(*tinfo->cpptype);
internals.direct_conversions.erase(tindex);

if (tinfo->module_local) {
get_local_internals().registered_types_cpp.erase(tindex);
} else {
internals.registered_types_cpp.erase(tindex);
}
internals.registered_types_py.erase(tinfo->type);

// Actually just `std::erase_if`, but that's only available in C++20
auto &cache = internals.inactive_override_cache;
for (auto it = cache.begin(), last = cache.end(); it != last;) {
if (it->first == (PyObject *) tinfo->type) {
it = cache.erase(it);
with_internals([obj](internals &internals) {
auto *type = (PyTypeObject *) obj;

// A pybind11-registered type will:
// 1) be found in internals.registered_types_py
// 2) have exactly one associated `detail::type_info`
auto found_type = internals.registered_types_py.find(type);
if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1
&& found_type->second[0]->type == type) {

auto *tinfo = found_type->second[0];
auto tindex = std::type_index(*tinfo->cpptype);
internals.direct_conversions.erase(tindex);

if (tinfo->module_local) {
get_local_internals().registered_types_cpp.erase(tindex);
} else {
++it;
internals.registered_types_cpp.erase(tindex);
}
internals.registered_types_py.erase(tinfo->type);

// Actually just `std::erase_if`, but that's only available in C++20
auto &cache = internals.inactive_override_cache;
for (auto it = cache.begin(), last = cache.end(); it != last;) {
if (it->first == (PyObject *) tinfo->type) {
it = cache.erase(it);
} else {
++it;
}
}
}

delete tinfo;
}
delete tinfo;
}
});

PyType_Type.tp_dealloc(obj);
}
Expand Down Expand Up @@ -313,19 +314,20 @@ inline void traverse_offset_bases(void *valueptr,
}

inline bool register_instance_impl(void *ptr, instance *self) {
get_internals().registered_instances.emplace(ptr, self);
with_instance_map(ptr, [&](instance_map &instances) { instances.emplace(ptr, self); });
return true; // unused, but gives the same signature as the deregister func
}
inline bool deregister_instance_impl(void *ptr, instance *self) {
auto &registered_instances = get_internals().registered_instances;
auto range = registered_instances.equal_range(ptr);
for (auto it = range.first; it != range.second; ++it) {
if (self == it->second) {
registered_instances.erase(it);
return true;
return with_instance_map(ptr, [&](instance_map &instances) {
auto range = instances.equal_range(ptr);
for (auto it = range.first; it != range.second; ++it) {
if (self == it->second) {
instances.erase(it);
return true;
}
}
}
return false;
return false;
});
}

inline void register_instance(instance *self, void *valptr, const type_info *tinfo) {
Expand Down Expand Up @@ -380,23 +382,27 @@ extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject
}

inline void add_patient(PyObject *nurse, PyObject *patient) {
auto &internals = get_internals();
auto *instance = reinterpret_cast<detail::instance *>(nurse);
instance->has_patients = true;
Py_INCREF(patient);
internals.patients[nurse].push_back(patient);

with_internals([&](internals &internals) { internals.patients[nurse].push_back(patient); });
}

inline void clear_patients(PyObject *self) {
auto *instance = reinterpret_cast<detail::instance *>(self);
auto &internals = get_internals();
auto pos = internals.patients.find(self);
assert(pos != internals.patients.end());
// Clearing the patients can cause more Python code to run, which
// can invalidate the iterator. Extract the vector of patients
// from the unordered_map first.
auto patients = std::move(pos->second);
internals.patients.erase(pos);
std::vector<PyObject *> patients;

with_internals([&](internals &internals) {
auto pos = internals.patients.find(self);
assert(pos != internals.patients.end());
// Clearing the patients can cause more Python code to run, which
// can invalidate the iterator. Extract the vector of patients
// from the unordered_map first.
patients = std::move(pos->second);
internals.patients.erase(pos);
});

instance->has_patients = false;
for (PyObject *&patient : patients) {
Py_CLEAR(patient);
Expand Down
149 changes: 132 additions & 17 deletions include/pybind11/detail/internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "../pytypes.h"

#include <exception>
#include <mutex>
#include <thread>

/// Tracks the `internals` and `type_info` ABI version independent of the main library version.
///
Expand All @@ -34,8 +36,11 @@
/// further ABI-incompatible changes may be made before the ABI is officially
/// changed to the new version.
#ifndef PYBIND11_INTERNALS_VERSION
# if PY_VERSION_HEX >= 0x030C0000
// Version bump for Python 3.12+, before first 3.12 beta release.
# if PY_VERSION_HEX >= 0x030D0000 || defined(Py_NOGIL)
// Version bump for Python 3.13+ or Py_NOGIL.
# define PYBIND11_INTERNALS_VERSION 6
# elif PY_VERSION_HEX >= 0x030C0000
// Version bump for Python 3.12, before first 3.12 beta release.
# define PYBIND11_INTERNALS_VERSION 5
# else
# define PYBIND11_INTERNALS_VERSION 4
Expand Down Expand Up @@ -162,15 +167,31 @@ struct override_hash {
}
};

using instance_map = std::unordered_multimap<const void *, instance *>;

struct instance_map_shard {
std::mutex mutex;
instance_map registered_instances;
char padding[64 - (sizeof(std::mutex) + sizeof(instance_map)) % 64];
};

/// Internal data structure used to track registered instances and types.
/// Whenever binary incompatible changes are made to this structure,
/// `PYBIND11_INTERNALS_VERSION` must be incremented.
struct internals {
#if PYBIND11_INTERNALS_VERSION >= 6
std::mutex mutex;
#endif
// std::type_index -> pybind11's type information
type_map<type_info *> registered_types_cpp;
// PyTypeObject* -> base type_info(s)
std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py;
std::unordered_multimap<const void *, instance *> registered_instances; // void * -> instance*
#if PYBIND11_INTERNALS_VERSION >= 6
std::unique_ptr<instance_map_shard[]> instance_shards; // void * -> instance*
size_t instance_shards_mask;
#else
instance_map registered_instances; // void * -> instance*
#endif
std::unordered_set<std::pair<const PyObject *, const char *>, override_hash>
inactive_override_cache;
type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
Expand Down Expand Up @@ -463,6 +484,20 @@ inline internals **get_internals_pp_from_capsule(handle obj) {
return static_cast<internals **>(raw_ptr);
}

inline uint64_t next_pow2(uint64_t x) {
// Round-up to the next power of two.
// See https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
x--;
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
x |= (x >> 32);
x++;
return x;
}

/// Return a reference to the current `internals` data
PYBIND11_NOINLINE internals &get_internals() {
auto **&internals_pp = get_internals_pp();
Expand Down Expand Up @@ -531,6 +566,18 @@ PYBIND11_NOINLINE internals &get_internals() {
internals_ptr->static_property_type = make_static_property_type();
internals_ptr->default_metaclass = make_default_metaclass();
internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
#if PYBIND11_INTERNALS_VERSION >= 6
# if defined(Py_NOGIL)
size_t num_shards = (size_t) next_pow2(2 * std::thread::hardware_concurrency());
if (num_shards == 0) {
num_shards = 1;
}
# else
size_t num_shards = 1;
# endif
internals_ptr->instance_shards.reset(new instance_map_shard[num_shards]);
internals_ptr->instance_shards_mask = num_shards - 1;
#endif // PYBIND11_INTERNALS_VERSION >= 6
}
return **internals_pp;
}
Expand Down Expand Up @@ -591,13 +638,77 @@ inline local_internals &get_local_internals() {
return *locals;
}

#if PYBIND11_INTERNALS_VERSION >= 6 && defined(Py_NOGIL)
# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock<std::mutex> lock((internals).mutex)
#else
# define PYBIND11_LOCK_INTERNALS(internals)
#endif

template <typename F>
inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) {
auto &internals = get_internals();
PYBIND11_LOCK_INTERNALS(internals);
return cb(internals);
}

inline uint64_t splitmix64(uint64_t z) {
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
return z ^ (z >> 31);
}

template <typename F>
inline auto with_instance_map(const void *ptr, const F &cb)
-> decltype(cb(std::declval<instance_map &>())) {
auto &internals = get_internals();

#if PYBIND11_INTERNALS_VERSION >= 6
// Hash address to compute shard, but ignore low bits. We'd like allocations
// from the same thread/core to map to the same shard and allocations from
// other threads/cores to map to other shards. Using the high bits is a good
// heuristic because memory allocators often have a per-thread
// arena/superblock/segment from which smaller allocations are served.
auto addr = reinterpret_cast<uintptr_t>(ptr);
uint64_t hash = splitmix64((uint64_t) (addr >> 20));
size_t idx = (size_t) hash & internals.instance_shards_mask;

auto &shard = internals.instance_shards[idx];
# if defined(Py_NOGIL)
std::unique_lock<std::mutex> lock(shard.mutex);
# endif
return cb(shard.registered_instances);
#else
(void) ptr;
return cb(internals.registered_instances);
#endif
}

inline size_t num_registered_instances() {
auto &internals = get_internals();
#if PYBIND11_INTERNALS_VERSION >= 6
size_t count = 0;
for (size_t i = 0; i <= internals.instance_shards_mask; ++i) {
auto &shard = internals.instance_shards[i];
std::unique_lock<std::mutex> lock(shard.mutex);
count += shard.registered_instances.size();
}
return count;
#else
return internals.registered_instances.size();
#endif
}

/// Constructs a std::string with the given arguments, stores it in `internals`, and returns its
/// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only
/// cleared when the program exits or after interpreter shutdown (when embedding), and so are
/// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name).
template <typename... Args>
const char *c_str(Args &&...args) {
auto &strings = get_internals().static_strings;
// GCC 4.8 doesn't like parameter unpack within lambda capture, so use
// PYBIND11_LOCK_INTERNALS.
auto &internals = get_internals();
PYBIND11_LOCK_INTERNALS(internals);
auto &strings = internals.static_strings;
strings.emplace_front(std::forward<Args>(args)...);
return strings.front().c_str();
}
Expand Down Expand Up @@ -627,30 +738,34 @@ PYBIND11_NAMESPACE_END(detail)
/// pybind11 version) running in the current interpreter. Names starting with underscores
/// are reserved for internal usage. Returns `nullptr` if no matching entry was found.
PYBIND11_NOINLINE void *get_shared_data(const std::string &name) {
auto &internals = detail::get_internals();
auto it = internals.shared_data.find(name);
return it != internals.shared_data.end() ? it->second : nullptr;
return detail::with_internals([&](detail::internals &internals) {
auto it = internals.shared_data.find(name);
return it != internals.shared_data.end() ? it->second : nullptr;
});
}

/// Set the shared data that can be later recovered by `get_shared_data()`.
PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) {
detail::get_internals().shared_data[name] = data;
return data;
return detail::with_internals([&](detail::internals &internals) {
internals.shared_data[name] = data;
return data;
});
}

/// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if
/// such entry exists. Otherwise, a new object of default-constructible type `T` is
/// added to the shared data under the given name and a reference to it is returned.
template <typename T>
T &get_or_create_shared_data(const std::string &name) {
auto &internals = detail::get_internals();
auto it = internals.shared_data.find(name);
T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr);
if (!ptr) {
ptr = new T();
internals.shared_data[name] = ptr;
}
return *ptr;
return *detail::with_internals([&](detail::internals &internals) {
auto it = internals.shared_data.find(name);
T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr);
if (!ptr) {
ptr = new T();
internals.shared_data[name] = ptr;
}
return ptr;
});
}

PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
Loading

0 comments on commit 6e7ddcc

Please sign in to comment.