Skip to content

Commit

Permalink
pythongh-122417: Implement per-thread heap type refcounts (python#122418
Browse files Browse the repository at this point in the history
)

The free-threaded build partially stores heap type reference counts in
distributed manner in per-thread arrays. This avoids reference count
contention when creating or destroying instances.

Co-authored-by: Ken Jin <kenjin@python.org>
  • Loading branch information
2 people authored and blhsing committed Aug 22, 2024
1 parent 64e891e commit 32134b0
Show file tree
Hide file tree
Showing 18 changed files with 427 additions and 69 deletions.
3 changes: 3 additions & 0 deletions Include/cpython/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ typedef struct _heaptypeobject {
PyObject *ht_module;
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
struct _specialization_cache _spec_cache; // For use by the specializer.
#ifdef Py_GIL_DISABLED
Py_ssize_t unique_id; // ID used for thread-local refcounting
#endif
/* here are optional user slots, followed by the members. */
} PyHeapTypeObject;

Expand Down
4 changes: 0 additions & 4 deletions Include/internal/pycore_gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
extern void _Py_ScheduleGC(PyThreadState *tstate);
extern void _Py_RunGC(PyThreadState *tstate);

#ifdef Py_GIL_DISABLED
// gh-117783: Immortalize objects that use deferred reference counting
extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp);
#endif

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ extern "C" {
#include "pycore_qsbr.h" // struct _qsbr_state
#include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_tuple.h" // struct _Py_tuple_state
#include "pycore_typeid.h" // struct _Py_type_id_pool
#include "pycore_typeobject.h" // struct types_state
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
#include "pycore_warnings.h" // struct _warnings_runtime_state
Expand Down Expand Up @@ -220,6 +221,7 @@ struct _is {
#if defined(Py_GIL_DISABLED)
struct _mimalloc_interp_state mimalloc;
struct _brc_state brc; // biased reference counting state
struct _Py_type_id_pool type_ids;
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
#endif

Expand Down
72 changes: 71 additions & 1 deletion Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,19 @@ extern "C" {
#include "pycore_interp.h" // PyInterpreterState.gc
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_typeid.h" // _PyType_IncrefSlow


#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1)

// This value is added to `ob_ref_shared` for objects that use deferred
// reference counting so that they are not immediately deallocated when the
// non-deferred reference count drops to zero.
//
// The value is half the maximum shared refcount because the low two bits of
// `ob_ref_shared` are used for flags.
#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8)

// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when
// comparing the reference count to stay compatible with C extensions built
// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF
Expand Down Expand Up @@ -280,6 +289,67 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
extern void _PyObject_FiniState(PyInterpreterState *interp);
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);

#ifndef Py_GIL_DISABLED
# define _Py_INCREF_TYPE Py_INCREF
# define _Py_DECREF_TYPE Py_DECREF
#else
static inline void
_Py_INCREF_TYPE(PyTypeObject *type)
{
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
assert(_Py_IsImmortal(type));
return;
}

_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;

// Unsigned comparison so that `unique_id=-1`, which indicates that
// per-thread refcounting has been disabled on this type, is handled by
// the "else".
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
# ifdef Py_REF_DEBUG
_Py_INCREF_IncRefTotal();
# endif
_Py_INCREF_STAT_INC();
tstate->types.refcounts[ht->unique_id]++;
}
else {
// The slow path resizes the thread-local refcount array if necessary.
// It handles the unique_id=-1 case to keep the inlinable function smaller.
_PyType_IncrefSlow(ht);
}
}

static inline void
_Py_DECREF_TYPE(PyTypeObject *type)
{
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
assert(_Py_IsImmortal(type));
return;
}

_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;

// Unsigned comparison so that `unique_id=-1`, which indicates that
// per-thread refcounting has been disabled on this type, is handled by
// the "else".
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
# ifdef Py_REF_DEBUG
_Py_DECREF_DecRefTotal();
# endif
_Py_DECREF_STAT_INC();
tstate->types.refcounts[ht->unique_id]--;
}
else {
// Directly decref the type if the type id is not assigned or if
// per-thread refcounting has been disabled on this type.
Py_DECREF(type);
}
}
#endif

/* Inline functions trading binary compatibility for speed:
_PyObject_Init() is the fast version of PyObject_Init(), and
_PyObject_InitVar() is the fast version of PyObject_InitVar().
Expand All @@ -291,7 +361,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj)
assert(op != NULL);
Py_SET_TYPE(op, typeobj);
assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj));
Py_INCREF(typeobj);
_Py_INCREF_TYPE(typeobj);
_Py_NewReference(op);
}

Expand Down
10 changes: 10 additions & 0 deletions Include/internal/pycore_tstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl {
struct _mimalloc_thread_state mimalloc;
struct _Py_freelists freelists;
struct _brc_thread_state brc;
struct {
// The thread-local refcounts for heap type objects
Py_ssize_t *refcounts;

// Size of the refcounts array.
Py_ssize_t size;

// If set, don't use thread-local refcounts
int is_finalized;
} types;
#endif

#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
Expand Down
75 changes: 75 additions & 0 deletions Include/internal/pycore_typeid.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#ifndef Py_INTERNAL_TYPEID_H
#define Py_INTERNAL_TYPEID_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#ifdef Py_GIL_DISABLED

// This contains code for allocating unique ids to heap type objects
// and re-using those ids when the type is deallocated.
//
// The type ids are used to implement per-thread reference counts of
// heap type objects to avoid contention on the reference count fields
// of heap type objects. Static type objects are immortal, so contention
// is not an issue for those types.
//
// Type id of -1 is used to indicate a type doesn't use thread-local
// refcounting. This value is used when a type object is finalized by the GC
// and during interpreter shutdown to allow the type object to be
// deallocated promptly when the object's refcount reaches zero.
//
// Each entry implicitly represents a type id based on it's offset in the
// table. Non-allocated entries form a free-list via the 'next' pointer.
// Allocated entries store the corresponding PyTypeObject.
typedef union _Py_type_id_entry {
// Points to the next free type id, when part of the freelist
union _Py_type_id_entry *next;

// Stores the type object when the id is assigned
PyHeapTypeObject *type;
} _Py_type_id_entry;

struct _Py_type_id_pool {
PyMutex mutex;

// combined table of types with allocated type ids and unallocated
// type ids.
_Py_type_id_entry *table;

// Next entry to allocate inside 'table' or NULL
_Py_type_id_entry *freelist;

// size of 'table'
Py_ssize_t size;
};

// Assigns the next id from the pool of type ids.
extern void _PyType_AssignId(PyHeapTypeObject *type);

// Releases the allocated type id back to the pool.
extern void _PyType_ReleaseId(PyHeapTypeObject *type);

// Merges the thread-local reference counts into the corresponding types.
extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate);

// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local
// array of refcounts.
extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate);

// Frees the interpreter's pool of type ids.
extern void _PyType_FinalizeIdPool(PyInterpreterState *interp);

// Increfs the type, resizing the thread-local refcount array if necessary.
PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type);

#endif /* Py_GIL_DISABLED */

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_TYPEID_H */
4 changes: 3 additions & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,6 +1710,7 @@ def delx(self): del self.__x
fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
s = vsize(fmt)
check(int, s)
typeid = 'n' if support.Py_GIL_DISABLED else ''
# class
s = vsize(fmt + # PyTypeObject
'4P' # PyAsyncMethods
Expand All @@ -1718,7 +1719,8 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1PIP' # Specializer cache
'1PIP' # Specializer cache
+ typeid # heap type id (free-threaded only)
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries
Expand Down
2 changes: 2 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ PYTHON_OBJS= \
Python/thread.o \
Python/traceback.o \
Python/tracemalloc.o \
Python/typeid.o \
Python/getopt.o \
Python/pystrcmp.o \
Python/pystrtod.o \
Expand Down Expand Up @@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_tracemalloc.h \
$(srcdir)/Include/internal/pycore_tstate.h \
$(srcdir)/Include/internal/pycore_tuple.h \
$(srcdir)/Include/internal/pycore_typeid.h \
$(srcdir)/Include/internal/pycore_typeobject.h \
$(srcdir)/Include/internal/pycore_typevarobject.h \
$(srcdir)/Include/internal/pycore_ucnhash.h \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
In the free-threaded build, the reference counts for heap type objects are now
partially stored in a distributed manner in per-thread arrays. This reduces
contention on the heap type's reference count fields when creating or
destroying instances of the same type from multiple threads concurrently.
10 changes: 1 addition & 9 deletions Objects/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -2477,15 +2477,7 @@ _PyObject_SetDeferredRefcount(PyObject *op)
assert(_Py_IsOwnedByCurrentThread(op));
assert(op->ob_ref_shared == 0);
_PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED);
PyInterpreterState *interp = _PyInterpreterState_GET();
if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) {
// gh-117696: immortalize objects instead of using deferred reference
// counting for now.
_Py_SetImmortal(op);
return;
}
op->ob_ref_local += 1;
op->ob_ref_shared = _Py_REF_QUEUED;
op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
#endif
}

Expand Down
16 changes: 13 additions & 3 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2452,7 +2452,7 @@ subtype_dealloc(PyObject *self)
reference counting. Only decref if the base type is not already a heap
allocated type. Otherwise, basedealloc should have decref'd it already */
if (type_needs_decref) {
Py_DECREF(type);
_Py_DECREF_TYPE(type);
}

/* Done */
Expand Down Expand Up @@ -2562,7 +2562,7 @@ subtype_dealloc(PyObject *self)
reference counting. Only decref if the base type is not already a heap
allocated type. Otherwise, basedealloc should have decref'd it already */
if (type_needs_decref) {
Py_DECREF(type);
_Py_DECREF_TYPE(type);
}

endlabel:
Expand Down Expand Up @@ -3913,7 +3913,9 @@ type_new_alloc(type_new_ctx *ctx)
et->ht_module = NULL;
et->_ht_tpname = NULL;

_PyObject_SetDeferredRefcount((PyObject *)et);
#ifdef Py_GIL_DISABLED
_PyType_AssignId(et);
#endif

return type;
}
Expand Down Expand Up @@ -4965,6 +4967,11 @@ _PyType_FromMetaclass_impl(
type->tp_weaklistoffset = weaklistoffset;
type->tp_dictoffset = dictoffset;

#ifdef Py_GIL_DISABLED
// Assign a type id to enable thread-local refcounting
_PyType_AssignId(res);
#endif

/* Ready the type (which includes inheritance).
*
* After this call we should generally only touch up what's
Expand Down Expand Up @@ -5914,6 +5921,9 @@ type_dealloc(PyObject *self)
}
Py_XDECREF(et->ht_module);
PyMem_Free(et->_ht_tpname);
#ifdef Py_GIL_DISABLED
_PyType_ReleaseId(et);
#endif
Py_TYPE(type)->tp_free((PyObject *)type);
}

Expand Down
1 change: 1 addition & 0 deletions PCbuild/_freeze_module.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@
<ClCompile Include="..\Python\thread.c" />
<ClCompile Include="..\Python\traceback.c" />
<ClCompile Include="..\Python\tracemalloc.c" />
<ClCompile Include="..\Python\typeid.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\PC\pyconfig.h.in" />
Expand Down
3 changes: 3 additions & 0 deletions PCbuild/_freeze_module.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,9 @@
<ClCompile Include="..\Python\tracemalloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\typeid.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Objects\tupleobject.c">
<Filter>Source Files</Filter>
</ClCompile>
Expand Down
2 changes: 2 additions & 0 deletions PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@
<ClInclude Include="..\Include\internal\pycore_tracemalloc.h" />
<ClInclude Include="..\Include\internal\pycore_tstate.h" />
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
<ClInclude Include="..\Include\internal\pycore_typeid.h" />
<ClInclude Include="..\Include\internal\pycore_typeobject.h" />
<ClInclude Include="..\Include\internal\pycore_typevarobject.h" />
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
Expand Down Expand Up @@ -643,6 +644,7 @@
<ClCompile Include="..\Python\thread.c" />
<ClCompile Include="..\Python\traceback.c" />
<ClCompile Include="..\Python\tracemalloc.c" />
<ClCompile Include="..\Python\typeid.c" />
</ItemGroup>
<ItemGroup Condition="$(IncludeExternals)">
<ClCompile Include="..\Modules\zlibmodule.c" />
Expand Down
6 changes: 6 additions & 0 deletions PCbuild/pythoncore.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,9 @@
<ClInclude Include="..\Include\internal\pycore_tuple.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_typeid.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_typeobject.h">
<Filter>Include\internal</Filter>
</ClInclude>
Expand Down Expand Up @@ -1493,6 +1496,9 @@
<ClCompile Include="..\Python\tracemalloc.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\typeid.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\bootstrap_hash.c">
<Filter>Python</Filter>
</ClCompile>
Expand Down
Loading

0 comments on commit 32134b0

Please sign in to comment.