From e9762520a64a56eb72817e625995c6bb3c0fd645 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 29 Jul 2024 19:01:25 +0000 Subject: [PATCH] gh-122417: Implement per-thread heap type refcounts The free-threaded build partially stores heap type reference counts in distributed manner in per-thread arrays. This avoids reference count contention when creating or destroying instances. Co-authored-by: Ken Jin --- Include/cpython/object.h | 3 + Include/internal/pycore_gc.h | 4 - Include/internal/pycore_interp.h | 2 + Include/internal/pycore_object.h | 69 ++++++++++- Include/internal/pycore_tstate.h | 10 ++ Include/internal/pycore_typeid.h | 73 +++++++++++ Lib/test/test_sys.py | 4 +- Makefile.pre.in | 2 + Objects/object.c | 10 +- Objects/typeobject.c | 22 +++- Python/gc_free_threading.c | 69 ++++------- Python/pystate.c | 14 +-- Python/typeid.c | 204 +++++++++++++++++++++++++++++++ 13 files changed, 415 insertions(+), 71 deletions(-) create mode 100644 Include/internal/pycore_typeid.h create mode 100644 Python/typeid.c diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 90cd7b54b341611..b13aa869f7ad8ea 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -270,6 +270,9 @@ typedef struct _heaptypeobject { PyObject *ht_module; char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec struct _specialization_cache _spec_cache; // For use by the specializer. +#ifdef Py_GIL_DISABLED + Py_ssize_t _ht_id; // ID used for thread-local refcounting +#endif /* here are optional user slots, followed by the members. */ } PyHeapTypeObject; diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index b4bf36e82e376ad..5dd5b0c78d42fab 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp); extern void _Py_ScheduleGC(PyThreadState *tstate); extern void _Py_RunGC(PyThreadState *tstate); -#ifdef Py_GIL_DISABLED -// gh-117783: Immortalize objects that use deferred reference counting -extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp); -#endif #ifdef __cplusplus } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 4a83862ac13e268..a1c1dd0c9572309 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -35,6 +35,7 @@ extern "C" { #include "pycore_qsbr.h" // struct _qsbr_state #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_tuple.h" // struct _Py_tuple_state +#include "pycore_typeid.h" // struct _Py_type_id_pool #include "pycore_typeobject.h" // struct types_state #include "pycore_unicodeobject.h" // struct _Py_unicode_state #include "pycore_warnings.h" // struct _warnings_runtime_state @@ -220,6 +221,7 @@ struct _is { #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; struct _brc_state brc; // biased reference counting state + struct _Py_type_id_pool type_ids; PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS]; #endif diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 155810d00bef5ba..c6e9520f9d87bda 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -14,10 +14,19 @@ extern "C" { #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_typeid.h" // _PyType_IncrefSlow #define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1) +// This value is added to `ob_ref_shared` for objects that use deferred +// reference counting so that they are not immediately deallocated when the +// non-deferred reference count drops to zero. +// +// The value is half the maximum shared refcount because the low two bits of +// `ob_ref_shared` are used for flags. +#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8) + // gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when // comparing the reference count to stay compatible with C extensions built // with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF @@ -280,6 +289,64 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp); extern void _PyObject_FiniState(PyInterpreterState *interp); extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); +static inline void +_Py_INCREF_TYPE(PyTypeObject *type) +{ +#ifndef Py_GIL_DISABLED + Py_INCREF(type); +#else + if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + assert(_Py_IsImmortal(type)); + return; + } + + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + + // Unsigned comparison so that `ht_id=-1` is treated as out-of-bounds. + Py_ssize_t ht_id = ht->_ht_id; + if ((size_t)ht_id >= (size_t)tstate->types.size) { + _PyType_IncrefSlow(ht); + } + else { +# ifdef Py_REF_DEBUG + _Py_INCREF_IncRefTotal(); +# endif + _Py_INCREF_STAT_INC(); + tstate->types.refcounts[ht_id]++; + } +#endif +} + +static inline void +_Py_DECREF_TYPE(PyTypeObject *type) +{ +#ifndef Py_GIL_DISABLED + Py_DECREF(type); +#else + if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + assert(_Py_IsImmortal(type)); + return; + } + + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + + // Unsigned comparison so that `ht_id=-1` is treated as out-of-bounds. + Py_ssize_t ht_id = ht->_ht_id; + if ((size_t)ht_id >= (size_t)tstate->types.size) { + Py_DECREF(type); + } + else { +# ifdef Py_REF_DEBUG + _Py_DECREF_DecRefTotal(); +# endif + _Py_DECREF_STAT_INC(); + tstate->types.refcounts[ht_id]--; + } +#endif +} + /* Inline functions trading binary compatibility for speed: _PyObject_Init() is the fast version of PyObject_Init(), and _PyObject_InitVar() is the fast version of PyObject_InitVar(). @@ -291,7 +358,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj) assert(op != NULL); Py_SET_TYPE(op, typeobj); assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj)); - Py_INCREF(typeobj); + _Py_INCREF_TYPE(typeobj); _Py_NewReference(op); } diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index 18c972bd3675992..f681b644c9ad5d9 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl { struct _mimalloc_thread_state mimalloc; struct _Py_freelists freelists; struct _brc_thread_state brc; + struct { + // The thread-local refcounts for heap type objects + Py_ssize_t *refcounts; + + // Size of the refcounts array. + Py_ssize_t size; + + // If set, don't use thread-local refcounts + int is_finalized; + } types; #endif #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) diff --git a/Include/internal/pycore_typeid.h b/Include/internal/pycore_typeid.h new file mode 100644 index 000000000000000..2bad1ce1eb42929 --- /dev/null +++ b/Include/internal/pycore_typeid.h @@ -0,0 +1,73 @@ +#ifndef Py_INTERNAL_TYPEID_H +#define Py_INTERNAL_TYPEID_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_GIL_DISABLED + +// This contains code for allocating unique ids to heap type objects +// and re-using those ids when the type is deallocated. +// +// The type ids are used to implement per-thread reference counts of +// heap type objects to avoid contention on the reference count fields +// of heap type objects. (Non-heap type objects are immortal, so contention +// is not an issue.) +// +// Type id of -1 is used to indicate a type doesn't use thread-local +// refcounting. +// +// Each entry implicitly represents a type id based on it's offset in the +// table. Non-allocated entries form a free-list via the 'next' pointer. +// Allocated entries store the corresponding PyTypeObject. +typedef union _Py_type_id_entry { + // Points to the next free type id, when part of the freelist + union _Py_type_id_entry *next; + + // Stores the type object when the id is assigned + PyHeapTypeObject *type; +} _Py_type_id_entry; + +struct _Py_type_id_pool { + PyMutex mutex; + + // combined table of types with allocated type ids and unallocated + // type ids. + _Py_type_id_entry *table; + + // Next entry to allocate inside 'table' or NULL + _Py_type_id_entry *freelist; + + // size of 'table' + Py_ssize_t size; +}; + +// Assigns the next id from the pool of type ids. +extern void _PyType_AssignId(PyHeapTypeObject *type); + +// Releases the allocated type id back to the pool. +extern void _PyType_ReleaseId(PyHeapTypeObject *type); + +// Merges the thread-local reference counts into the corresponding types. +extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate); + +// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local +// array of refcounts. +extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate); + +// Frees the interpreter's pool of type ids. +extern void _PyType_FinalizeIdPool(PyInterpreterState *interp); + +// Increfs the type, resizing the thread-local refcount array if necessary. +PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type); + +#endif /* Py_GIL_DISABLED */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_TYPEID_H */ diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 7e4bc980b390f7c..e57b7ff631ba2de 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1710,6 +1710,7 @@ def delx(self): del self.__x fmt = 'P2nPI13Pl4Pn9Pn12PIPc' s = vsize(fmt) check(int, s) + typeid = 'n' if support.Py_GIL_DISABLED else '' # class s = vsize(fmt + # PyTypeObject '4P' # PyAsyncMethods @@ -1718,7 +1719,8 @@ def delx(self): del self.__x '10P' # PySequenceMethods '2P' # PyBufferProcs '6P' - '1PIP' # Specializer cache + '1PIP' # Specializer cache + + typeid # heap type id (free-threaded only) ) class newstyleclass(object): pass # Separate block for PyDictKeysObject with 8 keys and 5 entries diff --git a/Makefile.pre.in b/Makefile.pre.in index 5608e593ac9aca5..cb4bba10a7c940b 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -480,6 +480,7 @@ PYTHON_OBJS= \ Python/structmember.o \ Python/symtable.o \ Python/sysmodule.o \ + Python/typeid.o \ Python/thread.o \ Python/traceback.o \ Python/tracemalloc.o \ @@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_tracemalloc.h \ $(srcdir)/Include/internal/pycore_tstate.h \ $(srcdir)/Include/internal/pycore_tuple.h \ + $(srcdir)/Include/internal/pycore_typeid.h \ $(srcdir)/Include/internal/pycore_typeobject.h \ $(srcdir)/Include/internal/pycore_typevarobject.h \ $(srcdir)/Include/internal/pycore_ucnhash.h \ diff --git a/Objects/object.c b/Objects/object.c index 8a648a464879107..7279e6a65d1b490 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2470,15 +2470,7 @@ _PyObject_SetDeferredRefcount(PyObject *op) assert(_Py_IsOwnedByCurrentThread(op)); assert(op->ob_ref_shared == 0); _PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) { - // gh-117696: immortalize objects instead of using deferred reference - // counting for now. - _Py_SetImmortal(op); - return; - } - op->ob_ref_local += 1; - op->ob_ref_shared = _Py_REF_QUEUED; + op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0); #endif } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5b0a466f9134953..5c4dc87c4aacbd2 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2439,7 +2439,8 @@ subtype_dealloc(PyObject *self) // Don't read type memory after calling basedealloc() since basedealloc() // can deallocate the type and free its memory. int type_needs_decref = (type->tp_flags & Py_TPFLAGS_HEAPTYPE - && !(base->tp_flags & Py_TPFLAGS_HEAPTYPE)); + && !(base->tp_flags & Py_TPFLAGS_HEAPTYPE) + && !_Py_IsImmortal(type)); assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); @@ -2452,7 +2453,7 @@ subtype_dealloc(PyObject *self) reference counting. Only decref if the base type is not already a heap allocated type. Otherwise, basedealloc should have decref'd it already */ if (type_needs_decref) { - Py_DECREF(type); + _Py_DECREF_TYPE(type); } /* Done */ @@ -2552,7 +2553,8 @@ subtype_dealloc(PyObject *self) // Don't read type memory after calling basedealloc() since basedealloc() // can deallocate the type and free its memory. int type_needs_decref = (type->tp_flags & Py_TPFLAGS_HEAPTYPE - && !(base->tp_flags & Py_TPFLAGS_HEAPTYPE)); + && !(base->tp_flags & Py_TPFLAGS_HEAPTYPE) + && !(_Py_IsImmortal(type))); assert(basedealloc); basedealloc(self); @@ -2562,7 +2564,7 @@ subtype_dealloc(PyObject *self) reference counting. Only decref if the base type is not already a heap allocated type. Otherwise, basedealloc should have decref'd it already */ if (type_needs_decref) { - Py_DECREF(type); + _Py_DECREF_TYPE(type); } endlabel: @@ -3913,7 +3915,9 @@ type_new_alloc(type_new_ctx *ctx) et->ht_module = NULL; et->_ht_tpname = NULL; - _PyObject_SetDeferredRefcount((PyObject *)et); +#ifdef Py_GIL_DISABLED + _PyType_AssignId(et); +#endif return type; } @@ -4965,6 +4969,11 @@ _PyType_FromMetaclass_impl( type->tp_weaklistoffset = weaklistoffset; type->tp_dictoffset = dictoffset; +#ifdef Py_GIL_DISABLED + // Assign a type id to enable thread-local refcounting + _PyType_AssignId(res); +#endif + /* Ready the type (which includes inheritance). * * After this call we should generally only touch up what's @@ -5914,6 +5923,9 @@ type_dealloc(PyObject *self) } Py_XDECREF(et->ht_module); PyMem_Free(et->_ht_tpname); +#ifdef Py_GIL_DISABLED + _PyType_ReleaseId(et); +#endif Py_TYPE(type)->tp_free((PyObject *)type); } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 53f04160c38841d..1e02db00649c750 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -15,6 +15,7 @@ #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_weakref.h" // _PyWeakref_ClearRef() #include "pydtrace.h" +#include "pycore_typeid.h" // _PyType_MergeThreadLocalRefcounts #ifdef Py_GIL_DISABLED @@ -164,7 +165,15 @@ disable_deferred_refcounting(PyObject *op) { if (_PyObject_HasDeferredRefcount(op)) { op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; - op->ob_ref_shared -= (1 << _Py_REF_SHARED_SHIFT); + op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0); + + if (PyType_Check(op)) { + // Disable thread-local refcounting for heap types + PyTypeObject *type = (PyTypeObject *)op; + if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + _PyType_ReleaseId((PyHeapTypeObject *)op); + } + } } } @@ -328,16 +337,6 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state) } } -static void -merge_all_queued_objects(PyInterpreterState *interp, struct collection_state *state) -{ - HEAD_LOCK(&_PyRuntime); - for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { - merge_queued_objects((_PyThreadStateImpl *)p, state); - } - HEAD_UNLOCK(&_PyRuntime); -} - static void process_delayed_frees(PyInterpreterState *interp) { @@ -389,7 +388,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, } Py_ssize_t refcount = Py_REFCNT(op); - refcount -= _PyObject_HasDeferredRefcount(op); + if (_PyObject_HasDeferredRefcount(op)) { + refcount -= _Py_REF_DEFERRED; + } _PyObject_ASSERT(op, refcount >= 0); if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) { @@ -754,10 +755,6 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - // gh-117783: immortalize objects that would use deferred refcounting - // once the first non-main thread is created (but not in subinterpreters). - gcstate->immortalize = _Py_IsMainInterpreter(interp) ? 0 : -1; - gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { return _PyStatus_NO_MEMORY(); @@ -1105,8 +1102,18 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, state->gcstate->old[i-1].count = 0; } - // merge refcounts for all queued objects - merge_all_queued_objects(interp, state); + HEAD_LOCK(&_PyRuntime); + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p; + + // merge per-thread refcount for types into the type's actual refcount + _PyType_MergeThreadLocalRefcounts(tstate); + + // merge refcounts for all queued objects + merge_queued_objects(tstate, state); + } + HEAD_UNLOCK(&_PyRuntime); + process_delayed_frees(interp); // Find unreachable objects @@ -1835,32 +1842,6 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } -// gh-117783: Immortalize objects that use deferred reference counting to -// temporarily work around scaling bottlenecks. -static bool -immortalize_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, - void *block, size_t block_size, void *args) -{ - PyObject *op = op_from_block(block, args, false); - if (op != NULL && _PyObject_HasDeferredRefcount(op)) { - _Py_SetImmortal(op); - op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; - } - return true; -} - -void -_PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp) -{ - struct visitor_args args; - _PyEval_StopTheWorld(interp); - if (interp->gc.immortalize == 0) { - gc_visit_heaps(interp, &immortalize_visitor, &args); - interp->gc.immortalize = 1; - } - _PyEval_StartTheWorld(interp); -} - void PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) { diff --git a/Python/pystate.c b/Python/pystate.c index 6fbd17f7eaeaa99..a17f47922e746af 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -20,6 +20,7 @@ #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_sysmodule.h" // _PySys_Audit() #include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() +#include "pycore_typeid.h" // _PyType_FinalizeIdPool /* -------------------------------------------------------------------------- CAUTION @@ -1584,13 +1585,6 @@ new_threadstate(PyInterpreterState *interp, int whence) PyMem_RawFree(new_tstate); } else { -#ifdef Py_GIL_DISABLED - if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) { - // Immortalize objects marked as using deferred reference counting - // the first time a non-main thread is created. - _PyGC_ImmortalizeDeferredObjects(interp); - } -#endif } #ifdef Py_GIL_DISABLED @@ -1741,6 +1735,10 @@ PyThreadState_Clear(PyThreadState *tstate) struct _Py_freelists *freelists = _Py_freelists_GET(); _PyObject_ClearFreeLists(freelists, 1); + // Merge our thread-local refcounts into the type's own refcount and + // free our local refcount array. + _PyType_FinalizeThreadLocalRefcounts((_PyThreadStateImpl *)tstate); + // Remove ourself from the biased reference counting table of threads. _Py_brc_remove_thread(tstate); #endif @@ -1774,6 +1772,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) _PyRuntimeState *runtime = interp->runtime; HEAD_LOCK(runtime); + if (tstate->prev) { tstate->prev->next = tstate->next; } @@ -1799,6 +1798,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; tstate->interp->object_state.reftotal += tstate_impl->reftotal; tstate_impl->reftotal = 0; + assert(tstate_impl->types.refcounts == NULL); #endif HEAD_UNLOCK(runtime); diff --git a/Python/typeid.c b/Python/typeid.c new file mode 100644 index 000000000000000..afd7b3840dc8596 --- /dev/null +++ b/Python/typeid.c @@ -0,0 +1,204 @@ +#include "Python.h" + +#include "pycore_lock.h" // PyMutex_LockFlags() +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_object.h" // _Py_IncRefTotal +#include "pycore_typeid.h" + +// This contains code for allocating unique ids to heap type objects +// and re-using those ids when the type is deallocated. +// +// The type ids are used to implement per-thread reference counts of +// heap type objects to avoid contention on the reference count fields +// of heap type objects. Static type objects are immortal, so contention +// is not an issue. + +#ifdef Py_GIL_DISABLED + +#define POOL_MIN_SIZE 8 + +#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH) +#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex) + +static int +resize_interp_type_id_pool(struct _Py_type_id_pool *pool) +{ + if ((size_t)pool->size > PY_SSIZE_T_MAX / (2 * sizeof(*pool->table))) { + return -1; + } + + Py_ssize_t new_size = pool->size * 2; + if (new_size < POOL_MIN_SIZE) { + new_size = POOL_MIN_SIZE; + } + + _Py_type_id_entry *table = PyMem_Realloc(pool->table, + new_size * sizeof(*pool->table)); + if (table == NULL) { + return -1; + } + + Py_ssize_t start = pool->size; + for (Py_ssize_t i = start; i < new_size - 1; i++) { + table[i].next = &table[i + 1]; + } + table[new_size - 1].next = NULL; + + pool->table = table; + pool->freelist = &table[start]; + _Py_atomic_store_ssize(&pool->size, new_size); + return 0; +} + +static int +resize_local_refcounts(_PyThreadStateImpl *tstate) +{ + if (tstate->types.is_finalized) { + return -1; + } + + struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids; + Py_ssize_t size = _Py_atomic_load_ssize(&pool->size); + + Py_ssize_t *refcnts = PyMem_Realloc(tstate->types.refcounts, + size * sizeof(Py_ssize_t)); + if (refcnts == NULL) { + return -1; + } + + Py_ssize_t old_size = tstate->types.size; + if (old_size < size) { + memset(refcnts + old_size, 0, (size - old_size) * sizeof(Py_ssize_t)); + } + + tstate->types.refcounts = refcnts; + tstate->types.size = size; + return 0; +} + +void +_PyType_AssignId(PyHeapTypeObject *type) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_type_id_pool *pool = &interp->type_ids; + + LOCK_POOL(pool); + if (pool->freelist == NULL) { + if (resize_interp_type_id_pool(pool) < 0) { + type->_ht_id = -1; + UNLOCK_POOL(pool); + return; + } + } + + _Py_type_id_entry *entry = pool->freelist; + pool->freelist = entry->next; + entry->type = type; + _PyObject_SetDeferredRefcount((PyObject *)type); + type->_ht_id = (entry - pool->table); + UNLOCK_POOL(pool); +} + +void +_PyType_ReleaseId(PyHeapTypeObject *type) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_type_id_pool *pool = &interp->type_ids; + + if (type->_ht_id < 0) { + // The type doesn't have an id assigned. + return; + } + + LOCK_POOL(pool); + + _Py_type_id_entry *entry = &pool->table[type->_ht_id]; + assert(entry->type == type); + entry->next = pool->freelist; + pool->freelist = entry; + + type->_ht_id = -1; + UNLOCK_POOL(pool); +} + +void +_PyType_IncrefSlow(PyHeapTypeObject *type) +{ + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + if (type->_ht_id < 0 || resize_local_refcounts(tstate) < 0) { + // just incref the type directly. + Py_INCREF(type); + return; + } + + assert(type->_ht_id < tstate->types.size); + tstate->types.refcounts[type->_ht_id]++; +#ifdef Py_REF_DEBUG + _Py_IncRefTotal((PyThreadState *)tstate); +#endif + _Py_INCREF_STAT_INC(); +} + +void +_PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate) +{ + if (tstate->types.refcounts == NULL) { + return; + } + + struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids; + + LOCK_POOL(pool); + for (Py_ssize_t i = 0, n = tstate->types.size; i < n; i++) { + Py_ssize_t refcnt = tstate->types.refcounts[i]; + if (refcnt != 0) { + PyObject *type = (PyObject *)pool->table[i].type; + assert(PyType_Check(type)); + + _Py_atomic_add_ssize(&type->ob_ref_shared, + refcnt << _Py_REF_SHARED_SHIFT); + tstate->types.refcounts[i] = 0; + } + } + UNLOCK_POOL(pool); +} + +void +_PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate) +{ + _PyType_MergeThreadLocalRefcounts(tstate); + + PyMem_Free(tstate->types.refcounts); + tstate->types.refcounts = NULL; + tstate->types.size = 0; + tstate->types.is_finalized = 1; +} + +void +_PyType_FinalizeIdPool(PyInterpreterState *interp) +{ + struct _Py_type_id_pool *pool = &interp->type_ids; + + // First, set the free-list to NULL values + while (pool->freelist) { + _Py_type_id_entry *next = pool->freelist->next; + pool->freelist->type = NULL; + pool->freelist = next; + } + + // Now everything non-NULL is a type. Set the type's id to -1 in case it + // outlives the interpreter. + for (Py_ssize_t i = 0; i < pool->size; i++) { + PyHeapTypeObject *ht = pool->table[i].type; + if (ht) { + ht->_ht_id = -1; + pool->table[i].type = NULL; + } + } + PyMem_Free(pool->table); + pool->table = NULL; + pool->freelist = NULL; + pool->size = 0; +} + +#endif /* Py_GIL_DISABLED */