Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-117139: A StackRef Debugging Mode #121134

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
14 changes: 14 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,19 @@ jobs:
# Cirrus used for upstream, macos-14 for forks.
os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma", "macos-14"]'

build_macos_free_threading_with_stackref_debug:
name: 'macOS (free-threading) (StackRef Debug)'
needs: check_source
if: needs.check_source.outputs.run_tests == 'true'
uses: ./.github/workflows/reusable-macos.yml
with:
config_hash: ${{ needs.check_source.outputs.config_hash }}
free-threading: true
stackref-debug: true
# Cirrus and macos-14 are M1.
# Cirrus used for upstream, macos-14 for forks.
os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma", "macos-14"]'

build_ubuntu:
name: 'Ubuntu'
needs: check_source
Expand Down Expand Up @@ -551,6 +564,7 @@ jobs:
- check_generated_files
- build_macos
- build_macos_free_threading
- build_macos_free_threading_with_stackref_debug
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we do this for normal builds as well?
The linux machines are cheapest, so we should probably use those.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't because normal builds require all the inline functions to be macros #121263. And these are too complicated to fit into a single macro.

- build_ubuntu
- build_ubuntu_free_threading
- build_ubuntu_ssltests
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/reusable-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ on:
required: false
type: boolean
default: false
stackref-debug:
required: false
type: boolean
default: false
os-matrix:
required: false
type: string
Expand Down Expand Up @@ -54,11 +58,14 @@ jobs:
--config-cache \
--with-pydebug \
${{ inputs.free-threading && '--disable-gil' || '' }} \
${{ inputs.stackref-debug && '--with-pystackrefdebug' || '' }} \
--prefix=/opt/python-dev \
--with-openssl="$(brew --prefix openssl@3.0)"
- name: Build CPython
run: make -j8
- name: Display build info
run: make pythoninfo
- name: Tests
run: make test
# Stackref debug is 3.5x slower than normal CPython,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about using the set of tests for profiling? It is about 40 tests instead of the usual ~400, so would give decent coverage without being too slow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry how do I run those specific tests? What command with python -m test?

# so we only run it on a restricted subset of tests.
run: ${{ inputs.stackref-debug && './python.exe -m test test_typing' || 'make test' }}
4 changes: 4 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,10 @@ struct _is {
/* the initial PyInterpreterState.threads.head */
_PyThreadStateImpl _initial_thread;
Py_ssize_t _interactive_src_count;

#if defined(Py_STACKREF_DEBUG) && defined(Py_GIL_DISABLED)
struct _Py_stackref_state stackref_state;
#endif
};


Expand Down
88 changes: 84 additions & 4 deletions Include/internal/pycore_stackref.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ extern "C" {
#endif

#include "pycore_object_deferred.h"
#include "pycore_hashtable.h"

#include <stddef.h>

Expand Down Expand Up @@ -52,43 +53,91 @@ typedef union {
uintptr_t bits;
} _PyStackRef;

struct _Py_stackref_entry {
PyObject *obj;
int is_live;
Fidget-Spinner marked this conversation as resolved.
Show resolved Hide resolved
};

struct _Py_stackref_state {
PyMutex lock;
_Py_hashtable_t *entries;
size_t next_ref;
};

typedef enum _PyStackRef_OpKind {
BORROW,
STEAL,
NEW,
} _PyStackRef_OpKind;

PyAPI_FUNC(PyObject *)_Py_stackref_to_object_transition(_PyStackRef stackref, _PyStackRef_OpKind op);
PyAPI_FUNC(_PyStackRef) _Py_object_to_stackref_transition(PyObject *obj, char tag, _PyStackRef_OpKind op);
PyAPI_FUNC(int) _PyStackRef_IsLive(_PyStackRef stackref);
PyAPI_FUNC(void) _PyStackRef_Close(_PyStackRef stackref);
PyAPI_FUNC(_PyStackRef) _PyStackRef_Dup(_PyStackRef stackref);

#define Py_TAG_DEFERRED (1)

#define Py_TAG_PTR (0)
#define Py_TAG_BITS (1)

#define RESERVED_BITS 1

#ifdef Py_GIL_DISABLED
static const _PyStackRef PyStackRef_NULL = { .bits = 0 | Py_TAG_DEFERRED};
#else
static const _PyStackRef PyStackRef_NULL = { .bits = 0 };
#endif

#define PyStackRef_IsNull(stackref) ((stackref).bits == PyStackRef_NULL.bits)


#ifdef Py_GIL_DISABLED
# define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_DEFERRED })
# ifdef Py_STACKREF_DEBUG
// From pycore_init_stackrefs in pylifecycle.c
static const _PyStackRef PyStackRef_True = { .bits = 1 << RESERVED_BITS };
# else
# define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_DEFERRED })
# endif
#else
# define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) })
#endif

#ifdef Py_GIL_DISABLED
# define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_DEFERRED })
# ifdef Py_STACKREF_DEBUG
// From pycore_init_stackrefs in pylifecycle.c
static const _PyStackRef PyStackRef_False = { .bits = 2 << RESERVED_BITS };
# else
# define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_DEFERRED })
# endif
#else
# define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) })
#endif

#ifdef Py_GIL_DISABLED
# ifdef Py_STACKREF_DEBUG
// From pycore_init_stackrefs in pylifecycle.c
static const _PyStackRef PyStackRef_None = { .bits = 3 << RESERVED_BITS };
# else
# define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_DEFERRED })
#endif
#else
# define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) })
#endif


static inline int
PyStackRef_Is(_PyStackRef a, _PyStackRef b) {
#if defined(Py_STACKREF_DEBUG) && defined(Py_GIL_DISABLED)
// Note: stackrefs are unique. So even immortal objects will produce different stackrefs.
return _Py_stackref_to_object_transition(a, BORROW) == _Py_stackref_to_object_transition(b, BORROW);
#else
return a.bits == b.bits;
#endif
}

static inline int
PyStackRef_IsNull(_PyStackRef stackref)
{
return PyStackRef_Is(stackref, PyStackRef_NULL);
}

static inline int
Expand All @@ -102,8 +151,13 @@ static inline PyObject *
PyStackRef_AsPyObjectBorrow(_PyStackRef stackref)
{
#ifdef Py_GIL_DISABLED
# if defined(Py_STACKREF_DEBUG)
return _Py_stackref_to_object_transition(stackref, BORROW);
# else
PyObject *cleared = ((PyObject *)((stackref).bits & (~Py_TAG_BITS)));
return cleared;
# endif

#else
return ((PyObject *)(stackref).bits);
#endif
Expand All @@ -115,10 +169,14 @@ static inline PyObject *
PyStackRef_AsPyObjectSteal(_PyStackRef stackref)
{
#ifdef Py_GIL_DISABLED
# ifdef Py_STACKREF_DEBUG
return _Py_stackref_to_object_transition(stackref, STEAL);
# else
if (!PyStackRef_IsNull(stackref) && PyStackRef_IsDeferred(stackref)) {
return Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref));
}
return PyStackRef_AsPyObjectBorrow(stackref);
# endif
#else
return PyStackRef_AsPyObjectBorrow(stackref);
#endif
Expand Down Expand Up @@ -146,7 +204,11 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj)
// Make sure we don't take an already tagged value.
assert(((uintptr_t)obj & Py_TAG_BITS) == 0);
int tag = (obj == NULL || _Py_IsImmortal(obj)) ? (Py_TAG_DEFERRED) : Py_TAG_PTR;
# ifdef Py_STACKREF_DEBUG
return _Py_object_to_stackref_transition(obj, tag, STEAL);
# else
return ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag});
# endif
#else
return ((_PyStackRef){.bits = ((uintptr_t)(obj))});
#endif
Expand All @@ -165,10 +227,18 @@ PyStackRef_FromPyObjectNew(PyObject *obj)
assert(obj != NULL);
// TODO (gh-117139): Add deferred objects later.
if (_Py_IsImmortal(obj)) {
# ifdef Py_STACKREF_DEBUG
return _Py_object_to_stackref_transition(obj, Py_TAG_DEFERRED, NEW);
# else
return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED };
# endif
}
else {
# ifdef Py_STACKREF_DEBUG
return _Py_object_to_stackref_transition(Py_NewRef(obj), Py_TAG_PTR, NEW);
# else
return (_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) | Py_TAG_PTR };
# endif
}
#else
return ((_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) });
Expand All @@ -186,7 +256,11 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj)
assert(((uintptr_t)obj & Py_TAG_BITS) == 0);
assert(obj != NULL);
assert(_Py_IsImmortal(obj));
# ifdef Py_STACKREF_DEBUG
return _Py_object_to_stackref_transition(obj, Py_TAG_DEFERRED, NEW);
# else
return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED };
# endif
#else
assert(_Py_IsImmortal(obj));
return ((_PyStackRef){ .bits = (uintptr_t)(obj) });
Expand Down Expand Up @@ -216,6 +290,9 @@ PyStackRef_CLOSE(_PyStackRef stackref)
return;
}
Py_DECREF(PyStackRef_AsPyObjectBorrow(stackref));
# ifdef Py_STACKREF_DEBUG
_PyStackRef_Close(stackref);
# endif
#else
Py_DECREF(PyStackRef_AsPyObjectBorrow(stackref));
#endif
Expand All @@ -234,6 +311,9 @@ static inline _PyStackRef
PyStackRef_DUP(_PyStackRef stackref)
{
#ifdef Py_GIL_DISABLED
# ifdef Py_STACKREF_DEBUG
stackref = _PyStackRef_Dup(stackref);
# endif
if (PyStackRef_IsDeferred(stackref)) {
assert(PyStackRef_IsNull(stackref) ||
_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)));
Expand Down
1 change: 1 addition & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ PYTHON_OBJS= \
Python/qsbr.o \
Python/bootstrap_hash.o \
Python/specialize.o \
Python/stackref.o \
Python/structmember.o \
Python/symtable.o \
Python/sysmodule.o \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Added a new ``--with-pystackrefs`` configure build option for CPython. This
option is fully experimental and may be modified or removed without prior
notice. Enabling the option turns on internal handle-like debugging for
CPython's main interpreter loop.
12 changes: 12 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,18 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch)
}
}

#if Py_STACKREF_DEBUG
static int
_PyEval_StackIsAllLive(_PyStackRef *stack_base, _PyStackRef *stack_pointer)
{
while (stack_pointer > stack_base) {
assert(_PyStackRef_IsLive(stack_pointer[-1]));
stack_pointer--;
}
return 1;
}
#endif

/* _PyEval_EvalFrameDefault() is a *big* function,
* so consume 3 units of C stack */
#define PY_EVAL_C_STACK_UNITS 2
Expand Down
15 changes: 14 additions & 1 deletion Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,17 @@
#define PRE_DISPATCH_GOTO() ((void)0)
#endif

#ifdef Py_STACKREF_DEBUG
#define STACKREF_CHECK() \
do { \
if (frame->f_executable && PyCode_Check(frame->f_executable)) { \
_PyEval_StackIsAllLive(_PyFrame_Stackbase(frame), stack_pointer); \
}; \
} while (0);
#else
#define STACKREF_CHECK() ((void)(0))
#endif

#if LLTRACE
#define LLTRACE_RESUME_FRAME() \
do { \
Expand All @@ -110,13 +121,15 @@ do { \
{ \
NEXTOPARG(); \
PRE_DISPATCH_GOTO(); \
STACKREF_CHECK(); \
DISPATCH_GOTO(); \
}

#define DISPATCH_SAME_OPARG() \
{ \
opcode = next_instr->op.code; \
PRE_DISPATCH_GOTO(); \
PRE_DISPATCH_GOTO(); \
STACKREF_CHECK(); \
DISPATCH_GOTO(); \
}

Expand Down
36 changes: 36 additions & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,35 @@ pycore_init_builtins(PyThreadState *tstate)
return _PyStatus_ERR("can't initialize builtins module");
}

#if defined(Py_STACKREF_DEBUG) && defined(Py_GIL_DISABLED)
static PyStatus
pycore_init_stackrefs(PyInterpreterState *interp)
{
_Py_hashtable_allocator_t alloc = {
.malloc = PyMem_Malloc,
.free = PyMem_Free,
};
interp->stackref_state.entries = _Py_hashtable_new_full(
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct,
NULL,
NULL,
&alloc
);
if (interp->stackref_state.entries == NULL) {
goto error;
}
interp->stackref_state.next_ref = 1;
// Reserve True, False, None
_Py_object_to_stackref_transition(Py_True, Py_TAG_DEFERRED, STEAL);
_Py_object_to_stackref_transition(Py_False, Py_TAG_DEFERRED, STEAL);
_Py_object_to_stackref_transition(Py_None, Py_TAG_DEFERRED, STEAL);
return _PyStatus_OK();

error:
return _PyStatus_ERR("can't initialize stackrefs to pyobject mapping");
}
#endif

static PyStatus
pycore_interp_init(PyThreadState *tstate)
Expand All @@ -845,6 +874,13 @@ pycore_interp_init(PyThreadState *tstate)
PyStatus status;
PyObject *sysmod = NULL;

#if defined(Py_STACKREF_DEBUG) && defined(Py_GIL_DISABLED)
status = pycore_init_stackrefs(interp);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
#endif

// Create singletons before the first PyType_Ready() call, since
// PyType_Ready() uses singletons like the Unicode empty string (tp_doc)
// and the empty tuple singletons (tp_bases).
Expand Down
4 changes: 4 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
interp->code_watchers[i] = NULL;
}
interp->active_code_watchers = 0;

#if defined(Py_STACKREF_DEBUG) && defined(Py_GIL_DISABLED)
PyMem_Free(interp->stackref_state.entries);
#endif
// XXX Once we have one allocator per interpreter (i.e.
// per-interpreter GC) we must ensure that all of the interpreter's
// objects have been cleaned up at the point.
Expand Down
Loading
Loading