Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-115999: Enable specialization of CALL instructions in free-threaded builds #127123

Merged
merged 29 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6d9b6a2
Refactor specialize_c_call to use helpers
mpage Oct 16, 2024
30e25d2
Refactor specialize_py_call to use helpers
mpage Oct 16, 2024
92160de
Refactor specialize_class_call to use helpers
mpage Oct 16, 2024
4754921
Refactor specialize_method_descriptor to use helpers
mpage Oct 16, 2024
6a20bb0
Remove unneeded code
mpage Oct 16, 2024
ea7206d
Enable almost all specializations of CALL
mpage Oct 17, 2024
d5375f1
Regen files
mpage Oct 18, 2024
a2ca192
Fix implementation of CALL_LIST_APPEND in free-threaded builds
mpage Oct 18, 2024
2ab08f2
Regenerate interpreter and friends
mpage Oct 18, 2024
3534246
Refactor PyType_LookupRef to return version
mpage Oct 22, 2024
acda1c6
Make CALL_ALLOC_AND_ENTER_INIT thread-safe
mpage Oct 24, 2024
fdfa678
Regenerate files
mpage Oct 24, 2024
ad2e15c
Stop the world around assignments to `tstate->eval_frame`
mpage Nov 19, 2024
0003d00
Document restriction on _Py_InitCleanup bytecode
mpage Nov 20, 2024
8ebd331
Undo refactor
mpage Nov 20, 2024
4c1ad6c
Undo workaround for now-fixed cases_generator bug
mpage Nov 20, 2024
57ba52d
Document _PyType_CacheInitForSpecialization
mpage Nov 21, 2024
8ebd73d
Remove unused define
mpage Nov 21, 2024
8651ebe
Enable tests
mpage Nov 21, 2024
4c7837f
Fix warning about unused function on macos
mpage Nov 22, 2024
4b0a850
Merge branch 'main' into gh-115999-tlbc-call
mpage Nov 25, 2024
d8a67c2
Tag workarounds for not deferring nested functions on classes with gh…
mpage Nov 25, 2024
3e8d85e
Use `_PyInterpreterState_SetEvalFrameFunc` when setting / clearing pe…
mpage Nov 25, 2024
de0e2ee
Fix issue with `_CREATE_INIT_FRAME`
mpage Nov 26, 2024
1cb6260
Merge branch 'main' into gh-115999-tlbc-call
mpage Dec 3, 2024
b3aa63c
Use release/acquire for the specialization cache
mpage Dec 3, 2024
6b591c3
Use locking macros instead of helper function
mpage Dec 3, 2024
a5fdc59
Merge branch 'main' into gh-115999-tlbc-call
mpage Dec 3, 2024
bc65932
Merge branch 'main' into gh-115999-tlbc-call
mpage Dec 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Include/internal/pycore_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,20 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj,
PyObject *name, PyObject *value);
extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name,
PyObject **attr);
extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *,
unsigned int *);

// Cache the provided init method in the specialization cache of type if the
// provided type version matches the current version of the type.
//
// The cached value is borrowed and is only valid if guarded by a type
// version check. In free-threaded builds the init method must also use
// deferred reference counting.
//
// Returns 1 if the value was cached or 0 otherwise.
extern int _PyType_CacheInitForSpecialization(PyHeapTypeObject *type,
PyObject *init,
unsigned int tp_version);

#ifdef Py_GIL_DISABLED
# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1)
Expand Down
17 changes: 10 additions & 7 deletions Lib/test/test_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import unittest

import test.support
from test.support import requires_specialization, script_helper
from test.support import requires_specialization_ft, script_helper
from test.support.import_helper import import_module

_testcapi = test.support.import_helper.import_module("_testcapi")
Expand Down Expand Up @@ -850,6 +850,13 @@ def __init__(self, events):
def __call__(self, code, offset, val):
self.events.append(("return", code.co_name, val))

# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
# are deferred. We only defer functions defined at the top-level.
class ValueErrorRaiser:
def __init__(self):
raise ValueError()


class ExceptionMonitoringTest(CheckEvents):

exception_recorders = (
Expand Down Expand Up @@ -1045,16 +1052,12 @@ def func():
)
self.assertEqual(events[0], ("throw", IndexError))

@requires_specialization
@requires_specialization_ft
def test_no_unwind_for_shim_frame(self):

class B:
def __init__(self):
raise ValueError()

def f():
try:
return B()
return ValueErrorRaiser()
except ValueError:
pass

Expand Down
32 changes: 27 additions & 5 deletions Lib/test/test_opcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,18 @@ def f():
self.assertFalse(f())


# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that
# are deferred. We only defer functions defined at the top-level.
class MyClass:
def __init__(self):
pass


class InitTakesArg:
def __init__(self, arg):
self.arg = arg


class TestCallCache(TestBase):
def test_too_many_defaults_0(self):
def f():
Expand Down Expand Up @@ -522,12 +534,8 @@ def f(x, y):
f()

@disabling_optimizer
@requires_specialization
@requires_specialization_ft
def test_assign_init_code(self):
class MyClass:
def __init__(self):
pass

def instantiate():
return MyClass()

Expand All @@ -544,6 +552,20 @@ def count_args(self, *args):
MyClass.__init__.__code__ = count_args.__code__
instantiate()

@disabling_optimizer
@requires_specialization_ft
def test_push_init_frame_fails(self):
def instantiate():
return InitTakesArg()

for _ in range(2):
with self.assertRaises(TypeError):
instantiate()
self.assert_specialized(instantiate, "CALL_ALLOC_AND_ENTER_INIT")

with self.assertRaises(TypeError):
instantiate()


@threading_helper.requires_working_threading()
class TestRacesDoNotCrash(TestBase):
Expand Down
9 changes: 7 additions & 2 deletions Lib/test/test_type_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import unittest
import dis
from test import support
from test.support import import_helper, requires_specialization
from test.support import import_helper, requires_specialization, requires_specialization_ft
try:
from sys import _clear_type_cache
except ImportError:
Expand Down Expand Up @@ -110,7 +110,6 @@ class HolderSub(Holder):
HolderSub.value

@support.cpython_only
@requires_specialization
class TypeCacheWithSpecializationTests(unittest.TestCase):
def tearDown(self):
_clear_type_cache()
Expand Down Expand Up @@ -140,6 +139,7 @@ def _check_specialization(self, func, arg, opname, *, should_specialize):
else:
self.assertIn(opname, self._all_opnames(func))

@requires_specialization
def test_class_load_attr_specialization_user_type(self):
class A:
def foo(self):
Expand All @@ -160,6 +160,7 @@ def load_foo_2(type_):

self._check_specialization(load_foo_2, A, "LOAD_ATTR", should_specialize=False)

@requires_specialization
def test_class_load_attr_specialization_static_type(self):
self.assertNotEqual(type_get_version(str), 0)
self.assertNotEqual(type_get_version(bytes), 0)
Expand All @@ -171,6 +172,7 @@ def get_capitalize_1(type_):
self.assertEqual(get_capitalize_1(str)('hello'), 'Hello')
self.assertEqual(get_capitalize_1(bytes)(b'hello'), b'Hello')

@requires_specialization
def test_property_load_attr_specialization_user_type(self):
class G:
@property
Expand All @@ -192,6 +194,7 @@ def load_x_2(instance):

self._check_specialization(load_x_2, G(), "LOAD_ATTR", should_specialize=False)

@requires_specialization
def test_store_attr_specialization_user_type(self):
class B:
__slots__ = ("bar",)
Expand All @@ -211,6 +214,7 @@ def store_bar_2(type_):

self._check_specialization(store_bar_2, B(), "STORE_ATTR", should_specialize=False)

@requires_specialization_ft
def test_class_call_specialization_user_type(self):
class F:
def __init__(self):
Expand All @@ -231,6 +235,7 @@ def call_class_2(type_):

self._check_specialization(call_class_2, F, "CALL", should_specialize=False)

@requires_specialization
def test_to_bool_specialization_user_type(self):
class H:
pass
Expand Down
62 changes: 55 additions & 7 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5527,9 +5527,12 @@ _PyTypes_AfterFork(void)
}

/* Internal API to look for a name through the MRO.
This returns a borrowed reference, and doesn't set an exception! */
This returns a strong reference, and doesn't set an exception!
If nonzero, version is set to the value of type->tp_version at the time of
the lookup.
*/
PyObject *
_PyType_LookupRef(PyTypeObject *type, PyObject *name)
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
{
PyObject *res;
int error;
Expand All @@ -5552,6 +5555,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
// If the sequence is still valid then we're done
if (value == NULL || _Py_TryIncref(value)) {
if (_PySeqLock_EndRead(&entry->sequence, sequence)) {
if (version != NULL) {
*version = entry_version;
}
return value;
}
Py_XDECREF(value);
Expand All @@ -5573,6 +5579,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name));
OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name));
Py_XINCREF(entry->value);
if (version != NULL) {
*version = entry->version;
}
return entry->value;
}
#endif
Expand All @@ -5586,12 +5595,12 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
// anyone else can modify our mro or mutate the type.

int has_version = 0;
int version = 0;
unsigned int assigned_version = 0;
BEGIN_TYPE_LOCK();
res = find_name_in_mro(type, name, &error);
if (MCACHE_CACHEABLE_NAME(name)) {
has_version = assign_version_tag(interp, type);
version = type->tp_version_tag;
assigned_version = type->tp_version_tag;
}
END_TYPE_LOCK();

Expand All @@ -5608,28 +5617,67 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
if (error == -1) {
PyErr_Clear();
}
if (version != NULL) {
// 0 is not a valid version
*version = 0;
}
return NULL;
}

if (has_version) {
#if Py_GIL_DISABLED
update_cache_gil_disabled(entry, name, version, res);
update_cache_gil_disabled(entry, name, assigned_version, res);
#else
PyObject *old_value = update_cache(entry, name, version, res);
PyObject *old_value = update_cache(entry, name, assigned_version, res);
Py_DECREF(old_value);
#endif
}
if (version != NULL) {
// 0 is not a valid version
*version = has_version ? assigned_version : 0;
}
return res;
}

/* Internal API to look for a name through the MRO.
This returns a strong reference, and doesn't set an exception!
*/
PyObject *
_PyType_LookupRef(PyTypeObject *type, PyObject *name)
{
return _PyType_LookupRefAndVersion(type, name, NULL);
}

/* Internal API to look for a name through the MRO.
This returns a borrowed reference, and doesn't set an exception! */
PyObject *
_PyType_Lookup(PyTypeObject *type, PyObject *name)
{
PyObject *res = _PyType_LookupRef(type, name);
PyObject *res = _PyType_LookupRefAndVersion(type, name, NULL);
Py_XDECREF(res);
return res;
}

int
_PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init,
unsigned int tp_version)
{
if (!init || !tp_version) {
return 0;
}
int can_cache;
BEGIN_TYPE_LOCK();
can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version;
#ifdef Py_GIL_DISABLED
can_cache = can_cache && _PyObject_HasDeferredRefcount(init);
#endif
if (can_cache) {
FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init);
}
END_TYPE_LOCK();
return can_cache;
}

static void
set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags)
{
Expand Down
16 changes: 10 additions & 6 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -3329,15 +3329,15 @@ dummy_func(
};

specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0]));
DISPATCH_SAME_OPARG();
}
OPCODE_DEFERRED_INC(CALL);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}

op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) {
Expand Down Expand Up @@ -3722,10 +3722,10 @@ dummy_func(
DEOPT_IF(!PyStackRef_IsNull(null[0]));
DEOPT_IF(!PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
DEOPT_IF(tp->tp_version_tag != type_version);
DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version);
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize));
STAT_INC(CALL, hit);
Expand All @@ -3743,17 +3743,19 @@ dummy_func(
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
DEAD(init);
DEAD(self);
init_frame = _PyEvalFramePushAndInit(
_PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
SYNC_SP();
if (init_frame == NULL) {
if (temp == NULL) {
_PyEval_FrameClearAndPop(tstate, shim);
ERROR_NO_POP();
}
init_frame = temp;
frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
/* Account for pushing the extra frame.
* We don't check recursion depth here,
Expand Down Expand Up @@ -4000,8 +4002,10 @@ dummy_func(
DEOPT_IF(callable_o != interp->callable_cache.list_append);
assert(self_o != NULL);
DEOPT_IF(!PyList_Check(self_o));
DEOPT_IF(!LOCK_OBJECT(self_o));
STAT_INC(CALL, hit);
int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
UNLOCK_OBJECT(self_o);
PyStackRef_CLOSE(self);
PyStackRef_CLOSE(callable);
ERROR_IF(err, error);
Expand Down
Loading
Loading