Skip to content

Commit

Permalink
pythongh-122417: Implement per-thread heap type refcounts
Browse files Browse the repository at this point in the history
The free-threaded build partially stores heap type reference counts in
distributed manner in per-thread arrays. This avoids reference count
contention when creating or destroying instances.

Co-authored-by: Ken Jin <kenjin@python.org>
  • Loading branch information
colesbury and Fidget-Spinner committed Jul 29, 2024
1 parent 490e0ad commit e976252
Show file tree
Hide file tree
Showing 13 changed files with 415 additions and 71 deletions.
3 changes: 3 additions & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ typedef struct _heaptypeobject {
PyObject *ht_module;
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
struct _specialization_cache _spec_cache; // For use by the specializer.
#ifdef Py_GIL_DISABLED
Py_ssize_t _ht_id; // ID used for thread-local refcounting
#endif
/* here are optional user slots, followed by the members. */
} PyHeapTypeObject;

Expand Down
4 changes: 0 additions & 4 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
extern void _Py_ScheduleGC(PyThreadState *tstate);
extern void _Py_RunGC(PyThreadState *tstate);

#ifdef Py_GIL_DISABLED
// gh-117783: Immortalize objects that use deferred reference counting
extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp);
#endif

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ extern "C" {
#include "pycore_qsbr.h" // struct _qsbr_state
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_tuple.h" // struct _Py_tuple_state
#include "pycore_typeid.h" // struct _Py_type_id_pool
#include "pycore_typeobject.h" // struct types_state
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
#include "pycore_warnings.h" // struct _warnings_runtime_state
Expand Down Expand Up @@ -220,6 +221,7 @@ struct _is {
#if defined(Py_GIL_DISABLED)
struct _mimalloc_interp_state mimalloc;
struct _brc_state brc; // biased reference counting state
struct _Py_type_id_pool type_ids;
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
#endif

Expand Down
69 changes: 68 additions & 1 deletion Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,19 @@ extern "C" {
#include "pycore_interp.h" // PyInterpreterState.gc
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_typeid.h" // _PyType_IncrefSlow


#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1)

// This value is added to `ob_ref_shared` for objects that use deferred
// reference counting so that they are not immediately deallocated when the
// non-deferred reference count drops to zero.
//
// The value is half the maximum shared refcount because the low two bits of
// `ob_ref_shared` are used for flags.
#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8)

// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when
// comparing the reference count to stay compatible with C extensions built
// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF
Expand Down Expand Up @@ -280,6 +289,64 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
extern void _PyObject_FiniState(PyInterpreterState *interp);
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);

static inline void
_Py_INCREF_TYPE(PyTypeObject *type)
{
#ifndef Py_GIL_DISABLED
Py_INCREF(type);
#else
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
assert(_Py_IsImmortal(type));
return;
}

_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;

// Unsigned comparison so that `ht_id=-1` is treated as out-of-bounds.
Py_ssize_t ht_id = ht->_ht_id;
if ((size_t)ht_id >= (size_t)tstate->types.size) {
_PyType_IncrefSlow(ht);
}
else {
# ifdef Py_REF_DEBUG
_Py_INCREF_IncRefTotal();
# endif
_Py_INCREF_STAT_INC();
tstate->types.refcounts[ht_id]++;
}
#endif
}

static inline void
_Py_DECREF_TYPE(PyTypeObject *type)
{
#ifndef Py_GIL_DISABLED
Py_DECREF(type);
#else
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
assert(_Py_IsImmortal(type));
return;
}

_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;

// Unsigned comparison so that `ht_id=-1` is treated as out-of-bounds.
Py_ssize_t ht_id = ht->_ht_id;
if ((size_t)ht_id >= (size_t)tstate->types.size) {
Py_DECREF(type);
}
else {
# ifdef Py_REF_DEBUG
_Py_DECREF_DecRefTotal();
# endif
_Py_DECREF_STAT_INC();
tstate->types.refcounts[ht_id]--;
}
#endif
}

/* Inline functions trading binary compatibility for speed:
_PyObject_Init() is the fast version of PyObject_Init(), and
_PyObject_InitVar() is the fast version of PyObject_InitVar().
Expand All @@ -291,7 +358,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj)
assert(op != NULL);
Py_SET_TYPE(op, typeobj);
assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj));
Py_INCREF(typeobj);
_Py_INCREF_TYPE(typeobj);
_Py_NewReference(op);
}

Expand Down
10 changes: 10 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl {
struct _mimalloc_thread_state mimalloc;
struct _Py_freelists freelists;
struct _brc_thread_state brc;
struct {
// The thread-local refcounts for heap type objects
Py_ssize_t *refcounts;

// Size of the refcounts array.
Py_ssize_t size;

// If set, don't use thread-local refcounts
int is_finalized;
} types;
#endif

#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
Expand Down
73 changes: 73 additions & 0 deletions Include/internal/pycore_typeid.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#ifndef Py_INTERNAL_TYPEID_H
#define Py_INTERNAL_TYPEID_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#ifdef Py_GIL_DISABLED

// This contains code for allocating unique ids to heap type objects
// and re-using those ids when the type is deallocated.
//
// The type ids are used to implement per-thread reference counts of
// heap type objects to avoid contention on the reference count fields
// of heap type objects. (Non-heap type objects are immortal, so contention
// is not an issue.)
//
// Type id of -1 is used to indicate a type doesn't use thread-local
// refcounting.
//
// Each entry implicitly represents a type id based on it's offset in the
// table. Non-allocated entries form a free-list via the 'next' pointer.
// Allocated entries store the corresponding PyTypeObject.
typedef union _Py_type_id_entry {
// Points to the next free type id, when part of the freelist
union _Py_type_id_entry *next;

// Stores the type object when the id is assigned
PyHeapTypeObject *type;
} _Py_type_id_entry;

struct _Py_type_id_pool {
PyMutex mutex;

// combined table of types with allocated type ids and unallocated
// type ids.
_Py_type_id_entry *table;

// Next entry to allocate inside 'table' or NULL
_Py_type_id_entry *freelist;

// size of 'table'
Py_ssize_t size;
};

// Assigns the next id from the pool of type ids.
extern void _PyType_AssignId(PyHeapTypeObject *type);

// Releases the allocated type id back to the pool.
extern void _PyType_ReleaseId(PyHeapTypeObject *type);

// Merges the thread-local reference counts into the corresponding types.
extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate);

// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local
// array of refcounts.
extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate);

// Frees the interpreter's pool of type ids.
extern void _PyType_FinalizeIdPool(PyInterpreterState *interp);

// Increfs the type, resizing the thread-local refcount array if necessary.
PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type);

#endif /* Py_GIL_DISABLED */

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_TYPEID_H */
4 changes: 3 additions & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,6 +1710,7 @@ def delx(self): del self.__x
fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
s = vsize(fmt)
check(int, s)
typeid = 'n' if support.Py_GIL_DISABLED else ''
# class
s = vsize(fmt + # PyTypeObject
'4P' # PyAsyncMethods
Expand All @@ -1718,7 +1719,8 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1PIP' # Specializer cache
'1PIP' # Specializer cache
+ typeid # heap type id (free-threaded only)
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
Expand Down
2 changes: 2 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ PYTHON_OBJS= \
Python/structmember.o \
Python/symtable.o \
Python/sysmodule.o \
Python/typeid.o \
Python/thread.o \
Python/traceback.o \
Python/tracemalloc.o \
Expand Down Expand Up @@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_tracemalloc.h \
$(srcdir)/Include/internal/pycore_tstate.h \
$(srcdir)/Include/internal/pycore_tuple.h \
$(srcdir)/Include/internal/pycore_typeid.h \
$(srcdir)/Include/internal/pycore_typeobject.h \
$(srcdir)/Include/internal/pycore_typevarobject.h \
$(srcdir)/Include/internal/pycore_ucnhash.h \
Expand Down
10 changes: 1 addition & 9 deletions Objects/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -2470,15 +2470,7 @@ _PyObject_SetDeferredRefcount(PyObject *op)
assert(_Py_IsOwnedByCurrentThread(op));
assert(op->ob_ref_shared == 0);
_PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED);
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) {
// gh-117696: immortalize objects instead of using deferred reference
// counting for now.
_Py_SetImmortal(op);
return;
}
op->ob_ref_local += 1;
op->ob_ref_shared = _Py_REF_QUEUED;
op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
#endif
}

Expand Down
22 changes: 17 additions & 5 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2439,7 +2439,8 @@ subtype_dealloc(PyObject *self)
// Don't read type memory after calling basedealloc() since basedealloc()
// can deallocate the type and free its memory.
int type_needs_decref = (type->tp_flags & Py_TPFLAGS_HEAPTYPE
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE));
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE)
&& !_Py_IsImmortal(type));

assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0);

Expand All @@ -2452,7 +2453,7 @@ subtype_dealloc(PyObject *self)
reference counting. Only decref if the base type is not already a heap
allocated type. Otherwise, basedealloc should have decref'd it already */
if (type_needs_decref) {
Py_DECREF(type);
_Py_DECREF_TYPE(type);
}

/* Done */
Expand Down Expand Up @@ -2552,7 +2553,8 @@ subtype_dealloc(PyObject *self)
// Don't read type memory after calling basedealloc() since basedealloc()
// can deallocate the type and free its memory.
int type_needs_decref = (type->tp_flags & Py_TPFLAGS_HEAPTYPE
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE));
&& !(base->tp_flags & Py_TPFLAGS_HEAPTYPE)
&& !(_Py_IsImmortal(type)));

assert(basedealloc);
basedealloc(self);
Expand All @@ -2562,7 +2564,7 @@ subtype_dealloc(PyObject *self)
reference counting. Only decref if the base type is not already a heap
allocated type. Otherwise, basedealloc should have decref'd it already */
if (type_needs_decref) {
Py_DECREF(type);
_Py_DECREF_TYPE(type);
}

endlabel:
Expand Down Expand Up @@ -3913,7 +3915,9 @@ type_new_alloc(type_new_ctx *ctx)
et->ht_module = NULL;
et->_ht_tpname = NULL;

_PyObject_SetDeferredRefcount((PyObject *)et);
#ifdef Py_GIL_DISABLED
_PyType_AssignId(et);
#endif

return type;
}
Expand Down Expand Up @@ -4965,6 +4969,11 @@ _PyType_FromMetaclass_impl(
type->tp_weaklistoffset = weaklistoffset;
type->tp_dictoffset = dictoffset;

#ifdef Py_GIL_DISABLED
// Assign a type id to enable thread-local refcounting
_PyType_AssignId(res);
#endif

/* Ready the type (which includes inheritance).
*
* After this call we should generally only touch up what's
Expand Down Expand Up @@ -5914,6 +5923,9 @@ type_dealloc(PyObject *self)
}
Py_XDECREF(et->ht_module);
PyMem_Free(et->_ht_tpname);
#ifdef Py_GIL_DISABLED
_PyType_ReleaseId(et);
#endif
Py_TYPE(type)->tp_free((PyObject *)type);
}

Expand Down
Loading

0 comments on commit e976252

Please sign in to comment.