Skip to content

Commit

Permalink
pythongh-115999: Enable specialization of CALL instructions in free…
Browse files Browse the repository at this point in the history
…-threaded builds (python#127123)

The CALL family of instructions were mostly thread-safe already and only required a small number of changes, which are documented below.

A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe:

Added _PyType_LookupRefAndVersion, which returns the type version corresponding to the returned ref.

Added _PyType_CacheInitForSpecialization, which takes an init method and the corresponding type version and only populates the specialization cache if the current type version matches the supplied version. This prevents potentially caching a stale value in free-threaded builds if we race with an update to __init__.

Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the reference to __init__ that is stored in the specialization cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes.
Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on failure.

A few other miscellaneous changes were also needed:

Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's per-object lock is held while we are appending to it.

Add missing co_tlbc for _Py_InitCleanup.

Stop/start the world around setting the eval frame hook. This allows us to read interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 documented below.
  • Loading branch information
mpage authored and ebonnal committed Jan 10, 2025
1 parent a0cb339 commit 99d0bb0
Show file tree
Hide file tree
Showing 11 changed files with 220 additions and 92 deletions.
14 changes: 14 additions & 0 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,20 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj,
PyObject *name, PyObject *value);
extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name,
PyObject **attr);
extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *,
unsigned int *);

// Cache the provided init method in the specialization cache of type if the
// provided type version matches the current version of the type.
//
// The cached value is borrowed and is only valid if guarded by a type
// version check. In free-threaded builds the init method must also use
// deferred reference counting.
//
// Returns 1 if the value was cached or 0 otherwise.
extern int _PyType_CacheInitForSpecialization(PyHeapTypeObject *type,
PyObject *init,
unsigned int tp_version);

#ifdef Py_GIL_DISABLED
# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1)
Expand Down
17 changes: 10 additions & 7 deletions Lib/test/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import unittest

import test.support
from test.support import requires_specialization, script_helper
from test.support import requires_specialization_ft, script_helper
from test.support.import_helper import import_module

_testcapi = test.support.import_helper.import_module("_testcapi")
Expand Down Expand Up @@ -850,6 +850,13 @@ def __init__(self, events):
def __call__(self, code, offset, val):
self.events.append(("return", code.co_name, val))

# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
# are deferred. We only defer functions defined at the top-level.
class ValueErrorRaiser:
def __init__(self):
raise ValueError()


class ExceptionMonitoringTest(CheckEvents):

exception_recorders = (
Expand Down Expand Up @@ -1045,16 +1052,12 @@ def func():
)
self.assertEqual(events[0], ("throw", IndexError))

@requires_specialization
@requires_specialization_ft
def test_no_unwind_for_shim_frame(self):

class B:
def __init__(self):
raise ValueError()

def f():
try:
return B()
return ValueErrorRaiser()
except ValueError:
pass

Expand Down
32 changes: 27 additions & 5 deletions Lib/test/test_opcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,18 @@ def f():
self.assertFalse(f())


# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
# are deferred. We only defer functions defined at the top-level.
class MyClass:
def __init__(self):
pass


class InitTakesArg:
def __init__(self, arg):
self.arg = arg


class TestCallCache(TestBase):
def test_too_many_defaults_0(self):
def f():
Expand Down Expand Up @@ -522,12 +534,8 @@ def f(x, y):
f()

@disabling_optimizer
@requires_specialization
@requires_specialization_ft
def test_assign_init_code(self):
class MyClass:
def __init__(self):
pass

def instantiate():
return MyClass()

Expand All @@ -544,6 +552,20 @@ def count_args(self, *args):
MyClass.__init__.__code__ = count_args.__code__
instantiate()

@disabling_optimizer
@requires_specialization_ft
def test_push_init_frame_fails(self):
def instantiate():
return InitTakesArg()

for _ in range(2):
with self.assertRaises(TypeError):
instantiate()
self.assert_specialized(instantiate, "CALL_ALLOC_AND_ENTER_INIT")

with self.assertRaises(TypeError):
instantiate()


@threading_helper.requires_working_threading()
class TestRacesDoNotCrash(TestBase):
Expand Down
9 changes: 7 additions & 2 deletions Lib/test/test_type_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import unittest
import dis
from test import support
from test.support import import_helper, requires_specialization
from test.support import import_helper, requires_specialization, requires_specialization_ft
try:
from sys import _clear_type_cache
except ImportError:
Expand Down Expand Up @@ -110,7 +110,6 @@ class HolderSub(Holder):
HolderSub.value

@support.cpython_only
@requires_specialization
class TypeCacheWithSpecializationTests(unittest.TestCase):
def tearDown(self):
_clear_type_cache()
Expand Down Expand Up @@ -140,6 +139,7 @@ def _check_specialization(self, func, arg, opname, *, should_specialize):
else:
self.assertIn(opname, self._all_opnames(func))

@requires_specialization
def test_class_load_attr_specialization_user_type(self):
class A:
def foo(self):
Expand All @@ -160,6 +160,7 @@ def load_foo_2(type_):

self._check_specialization(load_foo_2, A, "LOAD_ATTR", should_specialize=False)

@requires_specialization
def test_class_load_attr_specialization_static_type(self):
self.assertNotEqual(type_get_version(str), 0)
self.assertNotEqual(type_get_version(bytes), 0)
Expand All @@ -171,6 +172,7 @@ def get_capitalize_1(type_):
self.assertEqual(get_capitalize_1(str)('hello'), 'Hello')
self.assertEqual(get_capitalize_1(bytes)(b'hello'), b'Hello')

@requires_specialization
def test_property_load_attr_specialization_user_type(self):
class G:
@property
Expand All @@ -192,6 +194,7 @@ def load_x_2(instance):

self._check_specialization(load_x_2, G(), "LOAD_ATTR", should_specialize=False)

@requires_specialization
def test_store_attr_specialization_user_type(self):
class B:
__slots__ = ("bar",)
Expand All @@ -211,6 +214,7 @@ def store_bar_2(type_):

self._check_specialization(store_bar_2, B(), "STORE_ATTR", should_specialize=False)

@requires_specialization_ft
def test_class_call_specialization_user_type(self):
class F:
def __init__(self):
Expand All @@ -231,6 +235,7 @@ def call_class_2(type_):

self._check_specialization(call_class_2, F, "CALL", should_specialize=False)

@requires_specialization
def test_to_bool_specialization_user_type(self):
class H:
pass
Expand Down
62 changes: 55 additions & 7 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5528,9 +5528,12 @@ _PyTypes_AfterFork(void)
}

/* Internal API to look for a name through the MRO.
This returns a borrowed reference, and doesn't set an exception! */
This returns a strong reference, and doesn't set an exception!
If nonzero, version is set to the value of type->tp_version at the time of
the lookup.
*/
PyObject *
_PyType_LookupRef(PyTypeObject *type, PyObject *name)
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
{
PyObject *res;
int error;
Expand All @@ -5553,6 +5556,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
// If the sequence is still valid then we're done
if (value == NULL || _Py_TryIncref(value)) {
if (_PySeqLock_EndRead(&entry->sequence, sequence)) {
if (version != NULL) {
*version = entry_version;
}
return value;
}
Py_XDECREF(value);
Expand All @@ -5574,6 +5580,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
Py_XINCREF(entry->value);
if (version != NULL) {
*version = entry->version;
}
return entry->value;
}
#endif
Expand All @@ -5587,12 +5596,12 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
// anyone else can modify our mro or mutate the type.

int has_version = 0;
int version = 0;
unsigned int assigned_version = 0;
BEGIN_TYPE_LOCK();
res = find_name_in_mro(type, name, &error);
if (MCACHE_CACHEABLE_NAME(name)) {
has_version = assign_version_tag(interp, type);
version = type->tp_version_tag;
assigned_version = type->tp_version_tag;
}
END_TYPE_LOCK();

Expand All @@ -5609,28 +5618,67 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
if (error == -1) {
PyErr_Clear();
}
if (version != NULL) {
// 0 is not a valid version
*version = 0;
}
return NULL;
}

if (has_version) {
#if Py_GIL_DISABLED
update_cache_gil_disabled(entry, name, version, res);
update_cache_gil_disabled(entry, name, assigned_version, res);
#else
PyObject *old_value = update_cache(entry, name, version, res);
PyObject *old_value = update_cache(entry, name, assigned_version, res);
Py_DECREF(old_value);
#endif
}
if (version != NULL) {
// 0 is not a valid version
*version = has_version ? assigned_version : 0;
}
return res;
}

/* Internal API to look for a name through the MRO.
This returns a strong reference, and doesn't set an exception!
*/
PyObject *
_PyType_LookupRef(PyTypeObject *type, PyObject *name)
{
return _PyType_LookupRefAndVersion(type, name, NULL);
}

/* Internal API to look for a name through the MRO.
This returns a borrowed reference, and doesn't set an exception! */
PyObject *
_PyType_Lookup(PyTypeObject *type, PyObject *name)
{
PyObject *res = _PyType_LookupRef(type, name);
PyObject *res = _PyType_LookupRefAndVersion(type, name, NULL);
Py_XDECREF(res);
return res;
}

int
_PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init,
unsigned int tp_version)
{
if (!init || !tp_version) {
return 0;
}
int can_cache;
BEGIN_TYPE_LOCK();
can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version;
#ifdef Py_GIL_DISABLED
can_cache = can_cache && _PyObject_HasDeferredRefcount(init);
#endif
if (can_cache) {
FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init);
}
END_TYPE_LOCK();
return can_cache;
}

static void
set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags)
{
Expand Down
16 changes: 10 additions & 6 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3329,15 +3329,15 @@ dummy_func(
};

specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0]));
DISPATCH_SAME_OPARG();
}
OPCODE_DEFERRED_INC(CALL);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}

op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) {
Expand Down Expand Up @@ -3722,10 +3722,10 @@ dummy_func(
DEOPT_IF(!PyStackRef_IsNull(null[0]));
DEOPT_IF(!PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
DEOPT_IF(tp->tp_version_tag != type_version);
DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version);
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize));
STAT_INC(CALL, hit);
Expand All @@ -3743,17 +3743,19 @@ dummy_func(
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
DEAD(init);
DEAD(self);
init_frame = _PyEvalFramePushAndInit(
_PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
SYNC_SP();
if (init_frame == NULL) {
if (temp == NULL) {
_PyEval_FrameClearAndPop(tstate, shim);
ERROR_NO_POP();
}
init_frame = temp;
frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
/* Account for pushing the extra frame.
* We don't check recursion depth here,
Expand Down Expand Up @@ -4000,8 +4002,10 @@ dummy_func(
DEOPT_IF(callable_o != interp->callable_cache.list_append);
assert(self_o != NULL);
DEOPT_IF(!PyList_Check(self_o));
DEOPT_IF(!LOCK_OBJECT(self_o));
STAT_INC(CALL, hit);
int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
UNLOCK_OBJECT(self_o);
PyStackRef_CLOSE(self);
PyStackRef_CLOSE(callable);
ERROR_IF(err, error);
Expand Down
Loading

0 comments on commit 99d0bb0

Please sign in to comment.