From 575eeecb05cf74aca22090e1c71bb43aed968879 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 27 May 2024 18:41:47 +0200 Subject: [PATCH 01/50] src: wip supporting Python 3.13 --- src/bytecode/concrete.py | 14 ++++++++++++-- src/bytecode/instr.py | 36 +++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 5472212a..36c50e12 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1046,7 +1046,9 @@ def to_bytecode( arg = FreeVar(name) elif c_instr.opcode in _opcode.hascompare: arg = Compare( - (c_arg >> 4) if sys.version_info >= (3, 12) else c_arg + (c_arg >> 5) + (c_arg & 16) << 4 + if sys.version_info >= (3, 13) + else ((c_arg >> 4) if sys.version_info >= (3, 12) else c_arg) ) elif c_instr.opcode in INTRINSIC_1OP: arg = Intrinsic1Op(c_arg) @@ -1273,9 +1275,17 @@ def concrete_instructions(self) -> None: arg = self.bytecode.freevars.index(arg.name) elif instr.opcode in _opcode.hascompare: if isinstance(arg, Compare): + # In Python 3.13 the 4 lowest bits are used for caching + # and the 5th one indicate a cast to bool + if sys.version_info >= (3, 13): + arg = ( + arg._get_mask() + + ((arg.value & 0b1111) << 4) + + (arg.value & 16) + ) # In Python 3.12 the 4 lowest bits are used for caching # See compare_masks in compile.c - if sys.version_info >= (3, 12): + elif sys.version_info >= (3, 12): arg = arg._get_mask() + (arg.value << 4) else: arg = arg.value diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index f5e22c20..9af507e3 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -39,6 +39,9 @@ ) INTRINSIC = INTRINSIC_1OP + INTRINSIC_2OP +HASJABS = () if sys.version_info >= (3, 13) else _opcode.hasjabs +HASJREL = _opcode.hasjump if sys.version_info >= (3, 13) else _opcode.hasjrel + # Used for COMPARE_OP opcode argument @enum.unique @@ -59,19 +62,28 @@ class Compare(enum.IntEnum): if sys.version_info >= (3, 12): def _get_mask(self): - if self == Compare.EQ: + v = self & 0b1111 + if v == Compare.EQ: return 8 - elif self == Compare.NE: + elif v == Compare.NE: return 1 + 2 + 4 - elif self == Compare.LT: + elif v == Compare.LT: return 2 - elif self == Compare.LE: + elif v == Compare.LE: return 2 + 8 - elif self == Compare.GT: + elif v == Compare.GT: return 4 - elif self == Compare.GE: + elif v == Compare.GE: return 4 + 8 + if sys.version_info >= (3, 13): + LT_CAST = 0 + 16 + LE_CAST = 1 + 16 + EQ_CAST = 2 + 16 + NE_CAST = 3 + 16 + GT_CAST = 4 + 16 + GE_CAST = 5 + 16 + # Used for BINARY_OP under Python 3.11+ @enum.unique @@ -326,6 +338,10 @@ def opcode_has_argument(opcode: int) -> bool: "LOAD_FROM_DICT_OR_DEREF": (-1, 1), "LOAD_INTRISIC_1": (-1, 1), "LOAD_INTRISIC_2": (-2, 1), + "SET_FUNCTION_ATTRIBUTE": (-2, 1), # new in 3.13 + "CONVERT_VALUE": (-1, 1), # new in 3.13 + "FORMAT_SIMPLE": (-1, 1), # new in 3.13 + "FORMAT_SPEC": (-2, 1), # new in 3.13 } @@ -340,6 +356,8 @@ def opcode_has_argument(opcode: int) -> bool: -2 - arg if sys.version_info >= (3, 12) else -2, 1, ), + # 3.13 only + "CALL_KW": lambda effect, arg, jump: (-2 - arg, 1), # 3.12 changed the behavior of LOAD_ATTR "LOAD_ATTR": lambda effect, arg, jump: (-1, 1 + effect), "LOAD_SUPER_ATTR": lambda effect, arg, jump: (-3, 3 + effect), @@ -683,15 +701,15 @@ def is_uncond_jump(self) -> bool: def is_abs_jump(self) -> bool: """Is an absolute jump.""" - return self._opcode in _opcode.hasjabs + return self._opcode in HASJABS def is_forward_rel_jump(self) -> bool: """Is a forward relative jump.""" - return self._opcode in _opcode.hasjrel and "BACKWARD" not in self._name + return self._opcode in HASJREL and "BACKWARD" not in self._name def is_backward_rel_jump(self) -> bool: """Is a backward relative jump.""" - return self._opcode in _opcode.hasjrel and "BACKWARD" in self._name + return self._opcode in HASJREL and "BACKWARD" in self._name def is_final(self) -> bool: if self._name in { From 0bd3c710dd591af07c0543b993c3e1df770ef239 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 27 May 2024 18:42:10 +0200 Subject: [PATCH 02/50] cis: test on 3.13 --- .github/workflows/cis.yml | 2 ++ tox.ini | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cis.yml b/.github/workflows/cis.yml index 74b7f9e9..6ddd6ac5 100644 --- a/.github/workflows/cis.yml +++ b/.github/workflows/cis.yml @@ -52,6 +52,8 @@ jobs: toxenv: py311 - python-version: "3.12" toxenv: py312 + - python-version: "3.13-dev" + toxenv: py313 steps: - uses: actions/checkout@v4 - name: Get history and tags for SCM versioning to work diff --git a/tox.ini b/tox.ini index 7fba64ae..05b91507 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py3, py38, py39, py310, py311, py312, fmt, docs +envlist = py3, py38, py39, py310, py311, py312, py313, fmt, docs isolated_build = true [testenv] From 160ae8a9eb823ef91df02c116ee25c4232004ea8 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 27 May 2024 18:49:24 +0200 Subject: [PATCH 03/50] src: pre-evaluate python version comparisons --- src/bytecode/bytecode.py | 5 ++--- src/bytecode/cfg.py | 5 +++-- src/bytecode/concrete.py | 41 ++++++++++++++++++++-------------------- src/bytecode/instr.py | 39 ++++++++++++++++---------------------- src/bytecode/utils.py | 6 ++++++ 5 files changed, 47 insertions(+), 49 deletions(-) create mode 100644 src/bytecode/utils.py diff --git a/src/bytecode/bytecode.py b/src/bytecode/bytecode.py index 757d5247..312d7bd3 100644 --- a/src/bytecode/bytecode.py +++ b/src/bytecode/bytecode.py @@ -28,6 +28,7 @@ TryBegin, TryEnd, ) +from bytecode.utils import PY311 class BaseBytecode: @@ -297,9 +298,7 @@ def to_code( ) -> types.CodeType: # Prevent reconverting the concrete bytecode to bytecode and cfg to do the # calculation if we need to do it. - if stacksize is None or ( - sys.version_info >= (3, 11) and compute_exception_stack_depths - ): + if stacksize is None or (PY311 and compute_exception_stack_depths): cfg = _bytecode.ControlFlowGraph.from_bytecode(self) stacksize = cfg.compute_stacksize( check_pre_and_post=check_pre_and_post, diff --git a/src/bytecode/cfg.py b/src/bytecode/cfg.py index f6fa708b..9060b69f 100644 --- a/src/bytecode/cfg.py +++ b/src/bytecode/cfg.py @@ -23,6 +23,7 @@ from bytecode.concrete import ConcreteInstr from bytecode.flags import CompilerFlags from bytecode.instr import UNSET, Instr, Label, SetLineno, TryBegin, TryEnd +from bytecode.utils import PY311, PY310 T = TypeVar("T", bound="BasicBlock") U = TypeVar("U", bound="ControlFlowGraph") @@ -443,7 +444,7 @@ def _compute_exception_handler_stack_usage( def _is_stacksize_computation_relevant( self, block_id: int, fingerprint: Tuple[int, Optional[bool]] ) -> bool: - if sys.version_info >= (3, 11): + if PY311: # The computation is relevant if the block was not visited previously # with the same starting size and exception handler status than the # one in use @@ -519,7 +520,7 @@ def compute_stacksize( # Starting with Python 3.10, generator and coroutines start with one object # on the stack (None, anything is an error). initial_stack_size = 0 - if sys.version_info >= (3, 10) and self.flags & ( + if PY310 and self.flags & ( CompilerFlags.GENERATOR | CompilerFlags.COROUTINE | CompilerFlags.ASYNC_GENERATOR diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 36c50e12..8ee7c90d 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -49,11 +49,12 @@ const_key, opcode_has_argument, ) +from bytecode.utils import PY310, PY311, PY312, PY313 # - jumps use instruction # - lineno use bytes (dis.findlinestarts(code)) # - dis displays bytes -OFFSET_AS_INSTRUCTION = sys.version_info >= (3, 10) +OFFSET_AS_INSTRUCTION = PY310 def _set_docstring(code: _bytecode.BaseBytecode, consts: Sequence) -> None: @@ -180,7 +181,7 @@ def use_cache_opcodes(self) -> int: return ( # Not supposed to be used but we need it dis._inline_cache_entries[self._opcode] # type: ignore - if sys.version_info >= (3, 11) + if PY311 else 0 ) @@ -320,7 +321,7 @@ def from_code( instructions: MutableSequence[Union[SetLineno, ConcreteInstr]] # For Python 3.11+ we use dis to extract the detailed location information at # reduced maintenance cost. - if sys.version_info >= (3, 11): + if PY311: instructions = [ # dis.get_instructions automatically handle extended arg which # we do not want, so we fold back arguments to be between 0 and 255 @@ -334,7 +335,7 @@ def from_code( for i in dis.get_instructions(code, show_caches=True) ] else: - if sys.version_info >= (3, 10): + if PY310: line_starts = {offset: lineno for offset, _, lineno in code.co_lines()} else: line_starts = dict(dis.findlinestarts(code)) @@ -377,7 +378,7 @@ def from_code( bytecode.freevars = list(code.co_freevars) bytecode.cellvars = list(code.co_cellvars) _set_docstring(bytecode, code.co_consts) - if sys.version_info >= (3, 11): + if PY311: bytecode.exception_table = bytecode._parse_exception_table( code.co_exceptiontable ) @@ -792,7 +793,7 @@ def _parse_varint(except_table_iterator: Iterator[int]) -> int: def _parse_exception_table( self, exception_table: bytes ) -> List[ExceptionTableEntry]: - assert sys.version_info >= (3, 11) + assert PY311 table = [] iterator = iter(exception_table) try: @@ -848,9 +849,7 @@ def to_code( ) -> types.CodeType: # Prevent reconverting the concrete bytecode to bytecode and cfg to do the # calculation if we need to do it. - if stacksize is None or ( - sys.version_info >= (3, 11) and compute_exception_stack_depths - ): + if stacksize is None or (PY311 and compute_exception_stack_depths): cfg = _bytecode.ControlFlowGraph.from_bytecode(self.to_bytecode()) stacksize = cfg.compute_stacksize( check_pre_and_post=check_pre_and_post, @@ -865,16 +864,16 @@ def to_code( lnotab = ( self._assemble_locations(self.first_lineno, linenos) - if sys.version_info >= (3, 11) + if PY311 else ( self._assemble_linestable(self.first_lineno, linenos) - if sys.version_info >= (3, 10) + if PY310 else self._assemble_lnotab(self.first_lineno, linenos) ) ) nlocals = len(self.varnames) - if sys.version_info >= (3, 11): + if PY311: return types.CodeType( self.argcount, self.posonlyargcount, @@ -969,7 +968,7 @@ def to_bytecode( # Free vars are never shared and correspond to index larger than the # largest cell var. # See PyCode_NewWithPosOnlyArgs - if sys.version_info >= (3, 11): + if PY311: cells_lookup = self.varnames + [ n for n in self.cellvars if n not in self.varnames ] @@ -1047,8 +1046,8 @@ def to_bytecode( elif c_instr.opcode in _opcode.hascompare: arg = Compare( (c_arg >> 5) + (c_arg & 16) << 4 - if sys.version_info >= (3, 13) - else ((c_arg >> 4) if sys.version_info >= (3, 12) else c_arg) + if PY313 + else ((c_arg >> 4) if PY312 else c_arg) ) elif c_instr.opcode in INTRINSIC_1OP: arg = Intrinsic1Op(c_arg) @@ -1277,7 +1276,7 @@ def concrete_instructions(self) -> None: if isinstance(arg, Compare): # In Python 3.13 the 4 lowest bits are used for caching # and the 5th one indicate a cast to bool - if sys.version_info >= (3, 13): + if PY313: arg = ( arg._get_mask() + ((arg.value & 0b1111) << 4) @@ -1285,7 +1284,7 @@ def concrete_instructions(self) -> None: ) # In Python 3.12 the 4 lowest bits are used for caching # See compare_masks in compile.c - elif sys.version_info >= (3, 12): + elif PY312: arg = arg._get_mask() + (arg.value << 4) else: arg = arg.value @@ -1300,7 +1299,7 @@ def concrete_instructions(self) -> None: self.jumps.append((len(self.instructions), label, c_instr)) # If the instruction expect some cache - if sys.version_info >= (3, 11): + if PY311: self.required_caches = c_instr.use_cache_opcodes() self.seen_manual_cache = False @@ -1310,7 +1309,7 @@ def concrete_instructions(self) -> None: # names and update the arg argument of instructions using cell vars. # We also track by how much to offset free vars which are stored in a # contiguous array after the cell vars - if sys.version_info >= (3, 11): + if PY311: # Map naive cell index to shared index shared_name_indexes: Dict[int, int] = {} n_shared = 0 @@ -1365,7 +1364,7 @@ def compute_jumps(self) -> bool: # FIXME use opcode # Under 3.12+, FOR_ITER, SEND jump is increased by 1 implicitely # to skip over END_FOR, END_SEND see Python/instrumentation.c - if sys.version_info >= (3, 12) and instr.name in ("FOR_ITER", "SEND"): + if PY312 and instr.name in ("FOR_ITER", "SEND"): target_offset -= 1 if instr.is_forward_rel_jump(): @@ -1413,7 +1412,7 @@ def to_concrete_bytecode( compute_jumps_passes: Optional[int] = None, compute_exception_stack_depths: bool = True, ) -> ConcreteBytecode: - if sys.version_info >= (3, 11) and compute_exception_stack_depths: + if PY311 and compute_exception_stack_depths: cfg = _bytecode.ControlFlowGraph.from_bytecode(self.bytecode) cfg.compute_stacksize(compute_exception_stack_depths=True) self.bytecode = cfg.to_bytecode() diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 9af507e3..4cf7fc78 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -13,6 +13,7 @@ from typing_extensions import TypeGuard # type: ignore import bytecode as _bytecode +from bytecode.utils import PY312, PY311, PY313 # --- Instruction argument tools and @@ -21,26 +22,18 @@ # Instructions relying on a bit to modify its behavior. # The lowest bit is used to encode custom behavior. BITFLAG_INSTRUCTIONS = ( - ("LOAD_GLOBAL", "LOAD_ATTR") - if sys.version_info >= (3, 12) - else ("LOAD_GLOBAL",) - if sys.version_info >= (3, 11) - else () + ("LOAD_GLOBAL", "LOAD_ATTR") if PY312 else ("LOAD_GLOBAL",) if PY311 else () ) -BITFLAG2_INSTRUCTIONS = ("LOAD_SUPER_ATTR",) if sys.version_info >= (3, 12) else () +BITFLAG2_INSTRUCTIONS = ("LOAD_SUPER_ATTR",) if PY312 else () # Intrinsic related opcodes -INTRINSIC_1OP = ( - (_opcode.opmap["CALL_INTRINSIC_1"],) if sys.version_info >= (3, 12) else () -) -INTRINSIC_2OP = ( - (_opcode.opmap["CALL_INTRINSIC_2"],) if sys.version_info >= (3, 12) else () -) +INTRINSIC_1OP = (_opcode.opmap["CALL_INTRINSIC_1"],) if PY312 else () +INTRINSIC_2OP = (_opcode.opmap["CALL_INTRINSIC_2"],) if PY312 else () INTRINSIC = INTRINSIC_1OP + INTRINSIC_2OP -HASJABS = () if sys.version_info >= (3, 13) else _opcode.hasjabs -HASJREL = _opcode.hasjump if sys.version_info >= (3, 13) else _opcode.hasjrel +HASJABS = () if PY313 else _opcode.hasjabs +HASJREL = _opcode.hasjump if PY313 else _opcode.hasjrel # Used for COMPARE_OP opcode argument @@ -59,7 +52,7 @@ class Compare(enum.IntEnum): IS_NOT = 9 EXC_MATCH = 10 - if sys.version_info >= (3, 12): + if PY312: def _get_mask(self): v = self & 0b1111 @@ -76,7 +69,7 @@ def _get_mask(self): elif v == Compare.GE: return 4 + 8 - if sys.version_info >= (3, 13): + if PY313: LT_CAST = 0 + 16 LE_CAST = 1 + 16 EQ_CAST = 2 + 16 @@ -266,7 +259,7 @@ def _check_arg_int(arg: Any, name: str) -> TypeGuard[int]: return True -if sys.version_info >= (3, 12): +if PY312: def opcode_has_argument(opcode: int) -> bool: return opcode in dis.hasarg @@ -313,12 +306,12 @@ def opcode_has_argument(opcode: int) -> bool: "IMPORT_FROM": (-1, 2), "COPY_DICT_WITHOUT_KEYS": (-2, 2), # Call a function at position 7 (4 3.11+) on the stack and push the return value - "WITH_EXCEPT_START": (-4, 5) if sys.version_info >= (3, 11) else (-7, 8), + "WITH_EXCEPT_START": (-4, 5) if PY311 else (-7, 8), # Starting with Python 3.11 MATCH_CLASS does not push a boolean anymore - "MATCH_CLASS": (-3, 1 if sys.version_info >= (3, 11) else 2), + "MATCH_CLASS": (-3, 1 if PY311 else 2), "MATCH_MAPPING": (-1, 2), "MATCH_SEQUENCE": (-1, 2), - "MATCH_KEYS": (-2, 3 if sys.version_info >= (3, 11) else 4), + "MATCH_KEYS": (-2, 3 if PY311 else 4), "CHECK_EXC_MATCH": (-2, 2), # (TOS1, TOS) -> (TOS1, bool) "CHECK_EG_MATCH": (-2, 2), # (TOS, TOS1) -> non-matched, matched or TOS1, None) "PREP_RERAISE_STAR": (-2, 1), # (TOS1, TOS) -> new exception group) @@ -353,7 +346,7 @@ def opcode_has_argument(opcode: int) -> bool: # CALL pops the 2 above items and push the return # (when PRECALL does not exist it pops more as encoded by the effect) "CALL": lambda effect, arg, jump: ( - -2 - arg if sys.version_info >= (3, 12) else -2, + -2 - arg if PY312 else -2, 1, ), # 3.13 only @@ -440,7 +433,7 @@ def __init__( object.__setattr__(self, "col_offset", col_offset) object.__setattr__(self, "end_col_offset", end_col_offset) # In Python 3.11 0 is a valid lineno for some instructions (RESUME for example) - _check_location(lineno, "lineno", 0 if sys.version_info >= (3, 11) else 1) + _check_location(lineno, "lineno", 0 if PY311 else 1) _check_location(end_lineno, "end_lineno", 1) _check_location(col_offset, "col_offset", 0) _check_location(end_col_offset, "end_col_offset", 0) @@ -490,7 +483,7 @@ class SetLineno: def __init__(self, lineno: int) -> None: # In Python 3.11 0 is a valid lineno for some instructions (RESUME for example) - _check_location(lineno, "lineno", 0 if sys.version_info >= (3, 11) else 1) + _check_location(lineno, "lineno", 0 if PY311 else 1) self._lineno: int = lineno @property diff --git a/src/bytecode/utils.py b/src/bytecode/utils.py new file mode 100644 index 00000000..b9b7f52b --- /dev/null +++ b/src/bytecode/utils.py @@ -0,0 +1,6 @@ +import sys + +PY310 = sys.version_info >= (3, 10) +PY311 = sys.version_info >= (3, 11) +PY312 = sys.version_info >= (3, 12) +PY313 = sys.version_info >= (3, 13) From 13f8ffe5b4df9eeaffcfeec237cf69cc7a1789e8 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 27 May 2024 19:00:16 +0200 Subject: [PATCH 04/50] fix linting errors --- src/bytecode/cfg.py | 2 +- src/bytecode/instr.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bytecode/cfg.py b/src/bytecode/cfg.py index 9060b69f..97329980 100644 --- a/src/bytecode/cfg.py +++ b/src/bytecode/cfg.py @@ -23,7 +23,7 @@ from bytecode.concrete import ConcreteInstr from bytecode.flags import CompilerFlags from bytecode.instr import UNSET, Instr, Label, SetLineno, TryBegin, TryEnd -from bytecode.utils import PY311, PY310 +from bytecode.utils import PY310, PY311 T = TypeVar("T", bound="BasicBlock") U = TypeVar("U", bound="ControlFlowGraph") diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 4cf7fc78..28f65476 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -13,7 +13,7 @@ from typing_extensions import TypeGuard # type: ignore import bytecode as _bytecode -from bytecode.utils import PY312, PY311, PY313 +from bytecode.utils import PY311, PY312, PY313 # --- Instruction argument tools and From 251175f1c10187dffec8eebdeb7156e7f881026e Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 27 May 2024 19:00:40 +0200 Subject: [PATCH 05/50] concrete: fix access to inline cache entries number --- src/bytecode/concrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 8ee7c90d..db40a17f 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -181,7 +181,7 @@ def use_cache_opcodes(self) -> int: return ( # Not supposed to be used but we need it dis._inline_cache_entries[self._opcode] # type: ignore - if PY311 + if PY311 and self._opcode in dis._inline_cache_entries else 0 ) From ed82e0ab69fd4d1e2660ec26b50939c781e84dd2 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 28 May 2024 08:52:44 +0200 Subject: [PATCH 06/50] concrete: fix handling of cache on 3.11 and 3.12 --- src/bytecode/concrete.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index db40a17f..af27140b 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -181,8 +181,8 @@ def use_cache_opcodes(self) -> int: return ( # Not supposed to be used but we need it dis._inline_cache_entries[self._opcode] # type: ignore - if PY311 and self._opcode in dis._inline_cache_entries - else 0 + if PY313 and self._opcode in dis._inline_cache_entries + else (dis._inline_cache_entries[self._opcode] if PY311 else 0) ) From 449ad0421d5e7ba3cc5ca183b407cd26a30ae717 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 28 May 2024 09:06:43 +0200 Subject: [PATCH 07/50] tox: fix ruff invocation --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 05b91507..1a0e0567 100644 --- a/tox.ini +++ b/tox.ini @@ -12,7 +12,7 @@ commands = pytest --cov bytecode --cov-report=xml -v tests [testenv:fmt] basepython = python3 deps= - ruff + ruff check commands = ruff src/bytecode tests ruff format --check src/bytecode tests From 9da230e18e4d70d18f774ef0dffbe4135d1450cb Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 28 May 2024 09:07:17 +0200 Subject: [PATCH 08/50] src: use dynamic version check to satisfy Mypy --- src/bytecode/concrete.py | 18 +++++++++++------- src/bytecode/instr.py | 9 ++++++--- src/bytecode/utils.py | 9 +++++---- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index af27140b..dbb7da19 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -178,12 +178,16 @@ def disassemble(cls: Type[T], lineno: Optional[int], code: bytes, offset: int) - return cls(name, arg, lineno=lineno) def use_cache_opcodes(self) -> int: - return ( - # Not supposed to be used but we need it - dis._inline_cache_entries[self._opcode] # type: ignore - if PY313 and self._opcode in dis._inline_cache_entries - else (dis._inline_cache_entries[self._opcode] if PY311 else 0) - ) + if sys.version_info >= (3, 13): + return ( + dis._inline_cache_entries[self._opcode] + if self._opcode in dis._inline_cache_entries + else 0 + ) + elif sys.version_info >= (3, 11): + return dis._inline_cache_entries[self._opcode] # type: ignore + else: + return 0 class ExceptionTableEntry: @@ -873,7 +877,7 @@ def to_code( ) nlocals = len(self.varnames) - if PY311: + if sys.version_info >= (3, 11): return types.CodeType( self.argcount, self.posonlyargcount, diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 28f65476..8d09cccc 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -33,7 +33,10 @@ INTRINSIC = INTRINSIC_1OP + INTRINSIC_2OP HASJABS = () if PY313 else _opcode.hasjabs -HASJREL = _opcode.hasjump if PY313 else _opcode.hasjrel +if sys.version_info >= (3, 13): + HASJREL = _opcode.hasjump +else: + HASJREL = _opcode.hasjrel # Used for COMPARE_OP opcode argument @@ -137,7 +140,7 @@ class Intrinsic2Op(enum.IntEnum): # This make type checking happy but means it won't catch attempt to manipulate an unset # statically. We would need guard on object attribute narrowed down through methods class _UNSET(int): - instance = None + instance: Optional["_UNSET"] = None def __new__(cls): if cls.instance is None: @@ -259,7 +262,7 @@ def _check_arg_int(arg: Any, name: str) -> TypeGuard[int]: return True -if PY312: +if sys.version_info >= (3, 12): def opcode_has_argument(opcode: int) -> bool: return opcode in dis.hasarg diff --git a/src/bytecode/utils.py b/src/bytecode/utils.py index b9b7f52b..98c0d373 100644 --- a/src/bytecode/utils.py +++ b/src/bytecode/utils.py @@ -1,6 +1,7 @@ import sys +from typing import Final -PY310 = sys.version_info >= (3, 10) -PY311 = sys.version_info >= (3, 11) -PY312 = sys.version_info >= (3, 12) -PY313 = sys.version_info >= (3, 13) +PY310: Final[bool] = sys.version_info >= (3, 10) +PY311: Final[bool] = sys.version_info >= (3, 11) +PY312: Final[bool] = sys.version_info >= (3, 12) +PY313: Final[bool] = sys.version_info >= (3, 13) From c30d5deea4e7e5058fb0ab9cd1f8e65079dc2fdb Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 28 May 2024 09:10:21 +0200 Subject: [PATCH 09/50] cis: run lint on 3.11 --- .github/workflows/cis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cis.yml b/.github/workflows/cis.yml index 6ddd6ac5..df1524b9 100644 --- a/.github/workflows/cis.yml +++ b/.github/workflows/cis.yml @@ -24,7 +24,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: Install tools run: | python -m pip install --upgrade pip From f76abe0197ee726262eb975ebd8801bb82297e93 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 7 Aug 2024 17:13:24 +0200 Subject: [PATCH 10/50] add TO_BOOL instruction stack effect computation --- src/bytecode/instr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 8d09cccc..796f0a94 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -338,6 +338,7 @@ def opcode_has_argument(opcode: int) -> bool: "CONVERT_VALUE": (-1, 1), # new in 3.13 "FORMAT_SIMPLE": (-1, 1), # new in 3.13 "FORMAT_SPEC": (-2, 1), # new in 3.13 + "TO_BOOL": (-1, 1), # new in 3.13 } From 8eae13745c4fe418109be004cae05f87939307b0 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 7 Aug 2024 17:19:30 +0200 Subject: [PATCH 11/50] mark python 3.13 as supported --- pyproject.toml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 789c5a02..633a83f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Libraries :: Python Modules", ] dependencies = ["typing_extensions;python_version<'3.10'"] @@ -25,14 +26,14 @@ [project.urls] - homepage = "https://github.com/MatthieuDartiailh/bytecode" + homepage = "https://github.com/MatthieuDartiailh/bytecode" documentation = "https://bytecode.readthedocs.io/en/latest/" - repository = "https://github.com/MatthieuDartiailh/bytecode" - changelog = "https://github.com/MatthieuDartiailh/bytecode/blob/main/doc/changelog.rst" + repository = "https://github.com/MatthieuDartiailh/bytecode" + changelog = "https://github.com/MatthieuDartiailh/bytecode/blob/main/doc/changelog.rst" [build-system] - requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3"] + requires = ["setuptools>=61.2", "wheel", "setuptools_scm[toml]>=3.4.3"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] @@ -60,23 +61,23 @@ __version__ = "{version}" """ [tool.ruff] - src = ["src"] + src = ["src"] extend-exclude = ["tests/instruments/hardware/nifpga/scope_based"] - line-length = 88 + line-length = 88 [tool.ruff.lint] - select = ["B", "C", "E", "F", "W", "B9", "I", "C90", "RUF"] + select = ["B", "C", "E", "F", "W", "B9", "I", "C90", "RUF"] extend-ignore = ["E203", "E266", "E501", "F403", "F401", "RUF012"] [tool.ruff.lint.isort] - combine-as-imports = true + combine-as-imports = true extra-standard-library = ["opcode"] [tool.ruff.lint.mccabe] max-complexity = 42 [tool.mypy] - follow_imports = "normal" + follow_imports = "normal" strict_optional = true [tool.pytest.ini_options] From f645255b902d18f3a280d828074eee9d8543ee89 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 8 Aug 2024 19:10:25 +0200 Subject: [PATCH 12/50] fix ruff invocation --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 1a0e0567..6d786990 100644 --- a/tox.ini +++ b/tox.ini @@ -23,7 +23,7 @@ deps= ruff mypy commands = - ruff src/bytecode tests + ruff check src/bytecode tests ruff format --check src/bytecode tests mypy src tests From 7cae2f8ccc1d5928ddd9acd21c57cd8551183533 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 8 Aug 2024 19:11:27 +0200 Subject: [PATCH 13/50] concrete: fix compare creation from int --- src/bytecode/concrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index dbb7da19..b379e30a 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1049,7 +1049,7 @@ def to_bytecode( arg = FreeVar(name) elif c_instr.opcode in _opcode.hascompare: arg = Compare( - (c_arg >> 5) + (c_arg & 16) << 4 + (c_arg >> 5) + ((1 << 4) if (c_arg & 16) else 0) if PY313 else ((c_arg >> 4) if PY312 else c_arg) ) From 3db0ae32451bdfc0a51490ab7f6c6c1affa4f3b5 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 8 Aug 2024 19:12:17 +0200 Subject: [PATCH 14/50] tests: fix tests affected by opcode renaming --- tests/test_concrete.py | 68 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/tests/test_concrete.py b/tests/test_concrete.py index 3b75ba9a..76098679 100644 --- a/tests/test_concrete.py +++ b/tests/test_concrete.py @@ -60,7 +60,7 @@ def test_constructor(self): def test_attr(self): instr = ConcreteInstr("LOAD_CONST", 5, lineno=12) self.assertEqual(instr.name, "LOAD_CONST") - self.assertEqual(instr.opcode, 100) + self.assertEqual(instr.opcode, opcode.opmap["LOAD_CONST"]) self.assertEqual(instr.arg, 5) self.assertEqual(instr.lineno, 12) self.assertEqual(instr.size, 2) @@ -90,14 +90,14 @@ def test_set_attr(self): # operator name instr.name = "LOAD_FAST" self.assertEqual(instr.name, "LOAD_FAST") - self.assertEqual(instr.opcode, 124) + self.assertEqual(instr.opcode, opcode.opmap["LOAD_FAST"]) self.assertRaises(TypeError, setattr, instr, "name", 3) self.assertRaises(ValueError, setattr, instr, "name", "xxx") # operator code - instr.opcode = 100 + instr.opcode = opcode.opmap["LOAD_CONST"] self.assertEqual(instr.name, "LOAD_CONST") - self.assertEqual(instr.opcode, 100) + self.assertEqual(instr.opcode, opcode.opmap["LOAD_CONST"]) self.assertRaises(ValueError, setattr, instr, "opcode", -12) self.assertRaises(TypeError, setattr, instr, "opcode", "abc") @@ -129,35 +129,57 @@ def test_size(self): self.assertEqual(ConcreteInstr("LOAD_CONST", 0x1234ABCD).size, 8) def test_disassemble(self): - code = b"\t\x00d\x03" + code = bytes((opcode.opmap["NOP"], 0, opcode.opmap["LOAD_CONST"], 3)) instr = ConcreteInstr.disassemble(1, code, 0) self.assertEqual(instr, ConcreteInstr("NOP", lineno=1)) instr = ConcreteInstr.disassemble(2, code, 1 if OFFSET_AS_INSTRUCTION else 2) self.assertEqual(instr, ConcreteInstr("LOAD_CONST", 3, lineno=2)) - code = b"\x90\x12\x904\x90\xabd\xcd" + code = bytes( + ( + opcode.EXTENDED_ARG, + 0x12, + opcode.EXTENDED_ARG, + 0x34, + opcode.EXTENDED_ARG, + 0xAB, + instr.opcode, + 0xCD, + ) + ) instr = ConcreteInstr.disassemble(3, code, 0) self.assertEqual(instr, ConcreteInstr("EXTENDED_ARG", 0x12, lineno=3)) def test_assemble(self): instr = ConcreteInstr("NOP") - self.assertEqual(instr.assemble(), b"\t\x00") + self.assertEqual(instr.assemble(), bytes((instr.opcode, 0))) instr = ConcreteInstr("LOAD_CONST", 3) - self.assertEqual(instr.assemble(), b"d\x03") + self.assertEqual(instr.assemble(), bytes((instr.opcode, 3))) instr = ConcreteInstr("LOAD_CONST", 0x1234ABCD) self.assertEqual( instr.assemble(), - (b"\x90\x12\x904\x90\xabd\xcd"), + bytes( + ( + opcode.EXTENDED_ARG, + 0x12, + opcode.EXTENDED_ARG, + 0x34, + opcode.EXTENDED_ARG, + 0xAB, + instr.opcode, + 0xCD, + ) + ), ) instr = ConcreteInstr("LOAD_CONST", 3, extended_args=1) self.assertEqual( instr.assemble(), - (b"\x90\x00d\x03"), + bytes((opcode.EXTENDED_ARG, 0, instr.opcode, 3)), ) def test_get_jump_target(self): @@ -335,7 +357,18 @@ def test_negative_lnotab(self): concrete.first_lineno = 5 code = concrete.to_code() - expected = b"d\x00Z\x00d\x01Z\x01" + expected = bytes( + ( + opcode.opmap["LOAD_CONST"], + 0, + opcode.opmap["STORE_NAME"], + 0, + opcode.opmap["LOAD_CONST"], + 1, + opcode.opmap["STORE_NAME"], + 1, + ) + ) self.assertEqual(code.co_code, expected) self.assertEqual(code.co_firstlineno, 5) if sys.version_info >= (3, 12): @@ -363,7 +396,18 @@ def test_extended_lnotab(self): concrete.first_lineno = 1 code = concrete.to_code() - expected = b"d\x00Z\x00d\x01Z\x01" + expected = bytes( + ( + opcode.opmap["LOAD_CONST"], + 0, + opcode.opmap["STORE_NAME"], + 0, + opcode.opmap["LOAD_CONST"], + 1, + opcode.opmap["STORE_NAME"], + 1, + ) + ) self.assertEqual(code.co_code, expected) self.assertEqual(code.co_firstlineno, 1) if sys.version_info >= (3, 11): From c41d46bc3f9cd738b68104f795bf9fd92a01ef11 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Fri, 9 Aug 2024 16:13:09 +0200 Subject: [PATCH 15/50] instr: add support for dual argument opcode --- src/bytecode/concrete.py | 62 ++++++++++++++++++++++++++-------------- src/bytecode/instr.py | 41 ++++++++++++++++++++------ tests/test_concrete.py | 24 ++++++++++++++-- tests/test_instr.py | 18 ++++++++---- 4 files changed, 108 insertions(+), 37 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index b379e30a..f9924b29 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -25,8 +25,9 @@ from bytecode.flags import CompilerFlags from bytecode.instr import ( _UNSET, - BITFLAG2_INSTRUCTIONS, - BITFLAG_INSTRUCTIONS, + BITFLAG2_OPCODES, + BITFLAG_OPCODES, + DUAL_ARG_OPCODES, INTRINSIC, INTRINSIC_1OP, INTRINSIC_2OP, @@ -962,6 +963,7 @@ def to_bytecode( labels = {} tb_instrs: Dict[ExceptionTableEntry, TryBegin] = {} offset = 0 + # In Python 3.11+ cell and varnames can be shared and are indexed in a single # array. # As a consequence, the instruction argument can be either: @@ -1026,36 +1028,40 @@ def to_bytecode( # We may need to insert a TryEnd after a CACHE so we need to run the # through the last block. else: + opcode = c_instr._opcode arg: InstrArg c_arg = c_instr.arg # FIXME: better error reporting - if c_instr.opcode in _opcode.hasconst: + if opcode in _opcode.hasconst: arg = self.consts[c_arg] - elif c_instr.opcode in _opcode.haslocal: - arg = self.varnames[c_arg] - elif c_instr.opcode in _opcode.hasname: - if c_instr.name in BITFLAG_INSTRUCTIONS: + elif opcode in _opcode.haslocal: + if opcode in DUAL_ARG_OPCODES: + arg = (locals_lookup[c_arg >> 4], locals_lookup[c_arg & 15]) + else: + arg = locals_lookup[c_arg] + elif opcode in _opcode.hasname: + if opcode in BITFLAG_OPCODES: arg = (bool(c_arg & 1), self.names[c_arg >> 1]) - elif c_instr.name in BITFLAG2_INSTRUCTIONS: + elif opcode in BITFLAG2_OPCODES: arg = (bool(c_arg & 1), bool(c_arg & 2), self.names[c_arg >> 2]) else: arg = self.names[c_arg] - elif c_instr.opcode in _opcode.hasfree: + elif opcode in _opcode.hasfree: if c_arg < ncells: name = cells_lookup[c_arg] arg = CellVar(name) else: name = self.freevars[c_arg - ncells] arg = FreeVar(name) - elif c_instr.opcode in _opcode.hascompare: + elif opcode in _opcode.hascompare: arg = Compare( (c_arg >> 5) + ((1 << 4) if (c_arg & 16) else 0) if PY313 else ((c_arg >> 4) if PY312 else c_arg) ) - elif c_instr.opcode in INTRINSIC_1OP: + elif opcode in INTRINSIC_1OP: arg = Intrinsic1Op(c_arg) - elif c_instr.opcode in INTRINSIC_2OP: + elif opcode in INTRINSIC_2OP: arg = Intrinsic2Op(c_arg) else: arg = c_arg @@ -1233,6 +1239,7 @@ def concrete_instructions(self) -> None: elif instr.lineno is UNSET: instr.lineno = lineno + opcode = instr._opcode arg = instr.arg is_jump = False if isinstance(arg, Label): @@ -1240,13 +1247,24 @@ def concrete_instructions(self) -> None: # fake value, real value is set in compute_jumps() arg = 0 is_jump = True - elif instr.opcode in _opcode.hasconst: + elif opcode in _opcode.hasconst: arg = self.add_const(arg) - elif instr.opcode in _opcode.haslocal: - assert isinstance(arg, str) - arg = self.add(self.varnames, arg) - elif instr.opcode in _opcode.hasname: - if instr.name in BITFLAG_INSTRUCTIONS: + elif opcode in _opcode.haslocal: + if opcode in DUAL_ARG_OPCODES: + assert ( + isinstance(arg, tuple) + and len(arg) == 2 + and isinstance(arg[0], str) + and isinstance(arg[1], str) + ) + arg = (self.add(self.varnames, arg[0]) << 4) + self.add( + self.varnames, arg[1] + ) + else: + assert isinstance(arg, str) + arg = self.add(self.varnames, arg) + elif opcode in _opcode.hasname: + if opcode in BITFLAG_OPCODES: assert ( isinstance(arg, tuple) and len(arg) == 2 @@ -1255,7 +1273,7 @@ def concrete_instructions(self) -> None: ), arg index = self.add(self.names, arg[1]) arg = int(arg[0]) + (index << 1) - elif instr.name in BITFLAG2_INSTRUCTIONS: + elif opcode in BITFLAG2_OPCODES: assert ( isinstance(arg, tuple) and len(arg) == 3 @@ -1268,7 +1286,7 @@ def concrete_instructions(self) -> None: else: assert isinstance(arg, str), f"Got {arg}, expected a str" arg = self.add(self.names, arg) - elif instr.opcode in _opcode.hasfree: + elif opcode in _opcode.hasfree: if isinstance(arg, CellVar): cell_instrs.append(len(self.instructions)) arg = self.bytecode.cellvars.index(arg.name) @@ -1276,7 +1294,7 @@ def concrete_instructions(self) -> None: assert isinstance(arg, FreeVar) free_instrs.append(len(self.instructions)) arg = self.bytecode.freevars.index(arg.name) - elif instr.opcode in _opcode.hascompare: + elif opcode in _opcode.hascompare: if isinstance(arg, Compare): # In Python 3.13 the 4 lowest bits are used for caching # and the 5th one indicate a cast to bool @@ -1292,7 +1310,7 @@ def concrete_instructions(self) -> None: arg = arg._get_mask() + (arg.value << 4) else: arg = arg.value - elif instr.opcode in INTRINSIC: + elif opcode in INTRINSIC: if isinstance(arg, (Intrinsic1Op, Intrinsic2Op)): arg = arg.value diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 796f0a94..cef67e57 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -21,11 +21,15 @@ # Instructions relying on a bit to modify its behavior. # The lowest bit is used to encode custom behavior. -BITFLAG_INSTRUCTIONS = ( - ("LOAD_GLOBAL", "LOAD_ATTR") if PY312 else ("LOAD_GLOBAL",) if PY311 else () +BITFLAG_OPCODES = ( + (_opcode.opmap["LOAD_GLOBAL"], _opcode.opmap["LOAD_ATTR"]) + if PY312 + else (_opcode.opmap["LOAD_GLOBAL"],) + if PY311 + else () ) -BITFLAG2_INSTRUCTIONS = ("LOAD_SUPER_ATTR",) if PY312 else () +BITFLAG2_OPCODES = (_opcode.opmap["LOAD_SUPER_ATTR"],) if PY312 else () # Intrinsic related opcodes INTRINSIC_1OP = (_opcode.opmap["CALL_INTRINSIC_1"],) if PY312 else () @@ -38,6 +42,15 @@ else: HASJREL = _opcode.hasjrel +#: Opcodes taking 2 arguments (highest 4 bits and lowest 4 bits) +DUAL_ARG_OPCODES = () +if PY313: + DUAL_ARG_OPCODES = ( + _opcode.opmap["LOAD_FAST_LOAD_FAST"], + _opcode.opmap["STORE_FAST_LOAD_FAST"], + _opcode.opmap["STORE_FAST_STORE_FAST"], + ) + # Used for COMPARE_OP opcode argument @enum.unique @@ -354,7 +367,7 @@ def opcode_has_argument(opcode: int) -> bool: 1, ), # 3.13 only - "CALL_KW": lambda effect, arg, jump: (-2 - arg, 1), + "CALL_KW": lambda effect, arg, jump: (-3 - arg, 1), # 3.12 changed the behavior of LOAD_ATTR "LOAD_ATTR": lambda effect, arg, jump: (-1, 1 + effect), "LOAD_SUPER_ATTR": lambda effect, arg, jump: (-3, 3 + effect), @@ -642,11 +655,11 @@ def stack_effect(self, jump: Optional[bool] = None) -> int: arg = None # 3.11 where LOAD_GLOBAL arg encode whether or we push a null # 3.12 does the same for LOAD_ATTR - elif self.name in BITFLAG_INSTRUCTIONS and isinstance(self._arg, tuple): + elif self._opcode in BITFLAG_OPCODES and isinstance(self._arg, tuple): assert len(self._arg) == 2 arg = self._arg[0] # 3.12 does a similar trick for LOAD_SUPER_ATTR - elif self.name in BITFLAG2_INSTRUCTIONS and isinstance(self._arg, tuple): + elif self._opcode in BITFLAG2_OPCODES and isinstance(self._arg, tuple): assert len(self._arg) == 3 arg = self._arg[0] elif not isinstance(self._arg, int) or self._opcode in _opcode.hasconst: @@ -827,7 +840,7 @@ def _check_arg(self, name: str, opcode: int, arg: InstrArg) -> None: ) elif opcode in _opcode.haslocal or opcode in _opcode.hasname: - if name in BITFLAG_INSTRUCTIONS: + if opcode in BITFLAG_OPCODES: if not ( isinstance(arg, tuple) and len(arg) == 2 @@ -839,7 +852,7 @@ def _check_arg(self, name: str, opcode: int, arg: InstrArg) -> None: "got %s (value=%s)" % (name, type(arg).__name__, str(arg)) ) - elif name in BITFLAG2_INSTRUCTIONS: + elif opcode in BITFLAG2_OPCODES: if not ( isinstance(arg, tuple) and len(arg) == 3 @@ -852,6 +865,18 @@ def _check_arg(self, name: str, opcode: int, arg: InstrArg) -> None: "got %s (value=%s)" % (name, type(arg).__name__, str(arg)) ) + elif opcode in DUAL_ARG_OPCODES: + if not ( + isinstance(arg, tuple) + and len(arg) == 2 + and isinstance(arg[0], str) + and isinstance(arg[1], str) + ): + raise TypeError( + "operation %s argument must be a tuple[str, str], " + "got %s (value=%s)" % (name, type(arg).__name__, str(arg)) + ) + elif not isinstance(arg, str): raise TypeError( "operation %s argument must be a str, " diff --git a/tests/test_concrete.py b/tests/test_concrete.py index 76098679..b3045149 100644 --- a/tests/test_concrete.py +++ b/tests/test_concrete.py @@ -857,7 +857,14 @@ def foo(x: int, y: int): + [ ConcreteInstr("LOAD_CONST", 1 + const_offset, lineno=1), ConcreteInstr("LOAD_CONST", 2 + const_offset, lineno=1), - ConcreteInstr("MAKE_FUNCTION", 4, lineno=1), + *( + [ + ConcreteInstr("MAKE_FUNCTION", lineno=1), + ConcreteInstr("SET_FUNCTION_ATTRIBUTE", 4, lineno=1), + ] + if PY313 + else [ConcreteInstr("MAKE_FUNCTION", 4, lineno=1)] + ), ConcreteInstr("STORE_NAME", name_offset, lineno=1), ] + ( @@ -903,7 +910,20 @@ def test_extended_arg_nop(self): constants[0x000129] = "Arbitrary String" # EXTENDED_ARG 0x01, NOP 0xFF, EXTENDED_ARG 0x01, # LOAD_CONST 0x29, RETURN_VALUE 0x00 - codestring = bytes([0x90, 0x01, 0x09, 0xFF, 0x90, 0x01, 0x64, 0x29, 0x53, 0x00]) + codestring = bytes( + [ + opcode.EXTENDED_ARG, + 0x01, + opcode.opmap["NOP"], + 0xFF, + opcode.EXTENDED_ARG, + 0x01, + opcode.opmap["LOAD_CONST"], + 0x29, + opcode.opmap["RETURN_VALUE"], + 0x00, + ] + ) codetype_list = [ 0, 0, diff --git a/tests/test_instr.py b/tests/test_instr.py index 1437b87a..5d41c0ce 100644 --- a/tests/test_instr.py +++ b/tests/test_instr.py @@ -14,8 +14,9 @@ SetLineno, ) from bytecode.instr import ( - BITFLAG2_INSTRUCTIONS, - BITFLAG_INSTRUCTIONS, + BITFLAG2_OPCODES, + BITFLAG_OPCODES, + DUAL_ARG_OPCODES, INTRINSIC_1OP, INTRINSIC_2OP, InstrLocation, @@ -196,7 +197,7 @@ def test_invalid_arg(self): Instr("NOP") # Instructions using a bitflag in their oparg - for name in BITFLAG_INSTRUCTIONS: + for name in (opcode.opname[op] for op in BITFLAG_OPCODES): self.assertRaises(TypeError, Instr, name, "arg") self.assertRaises(TypeError, Instr, name, ("arg",)) self.assertRaises(TypeError, Instr, name, ("", "arg")) @@ -204,7 +205,7 @@ def test_invalid_arg(self): Instr(name, (True, "arg")) # Instructions using 2 bitflag in their oparg - for name in BITFLAG2_INSTRUCTIONS: + for name in (opcode.opname[op] for op in BITFLAG2_OPCODES): self.assertRaises(TypeError, Instr, name, "arg") self.assertRaises(TypeError, Instr, name, ("arg",)) self.assertRaises(TypeError, Instr, name, ("", True, "arg")) @@ -212,6 +213,13 @@ def test_invalid_arg(self): self.assertRaises(TypeError, Instr, name, (False, True, 1)) Instr(name, (False, True, "arg")) + # Instructions packing 2 args in their oparg + for name in (opcode.opname[op] for op in DUAL_ARG_OPCODES): + self.assertRaises(TypeError, Instr, name, "arg") + self.assertRaises(TypeError, Instr, name, ("arg",)) + self.assertRaises(TypeError, Instr, name, ("", True)) + Instr(name, ("arg1", "arg2")) + for name in [opcode.opname[i] for i in INTRINSIC_1OP]: self.assertRaises(TypeError, Instr, name, 1) Instr(name, Intrinsic1Op.INTRINSIC_PRINT) @@ -229,7 +237,7 @@ def test_require_arg(self): def test_attr(self): instr = Instr("LOAD_CONST", 3, lineno=5) self.assertEqual(instr.name, "LOAD_CONST") - self.assertEqual(instr.opcode, 100) + self.assertEqual(instr.opcode, opcode.opmap["LOAD_CONST"]) self.assertEqual(instr.arg, 3) self.assertEqual(instr.lineno, 5) From 29f3d8b1ba070474064a42081f123285d4b551c8 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Sun, 11 Aug 2024 20:02:10 +0200 Subject: [PATCH 16/50] instr: fix handling of COMPARE_OP argument conversion --- src/bytecode/concrete.py | 2 +- src/bytecode/instr.py | 2 +- tests/test_instr.py | 28 ++++++++++++++++------------ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index f9924b29..1235cf70 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1301,7 +1301,7 @@ def concrete_instructions(self) -> None: if PY313: arg = ( arg._get_mask() - + ((arg.value & 0b1111) << 4) + + ((arg.value & 0b1111) << 5) + (arg.value & 16) ) # In Python 3.12 the 4 lowest bits are used for caching diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index cef67e57..09b9b53f 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -43,7 +43,7 @@ HASJREL = _opcode.hasjrel #: Opcodes taking 2 arguments (highest 4 bits and lowest 4 bits) -DUAL_ARG_OPCODES = () +DUAL_ARG_OPCODES: tuple[int, ...] = () if PY313: DUAL_ARG_OPCODES = ( _opcode.opmap["LOAD_FAST_LOAD_FAST"], diff --git a/tests/test_instr.py b/tests/test_instr.py index 5d41c0ce..12efa42a 100644 --- a/tests/test_instr.py +++ b/tests/test_instr.py @@ -474,18 +474,22 @@ def f(): params = zip(iter(Compare), (True, True, False, True, False, False)) for cmp, expected in params: - with self.subTest(cmp): - bcode = Bytecode( - ([Instr("RESUME", 0)] if sys.version_info >= (3, 11) else []) - + [ - Instr("LOAD_CONST", 24), - Instr("LOAD_CONST", 42), - Instr("COMPARE_OP", cmp), - Instr("RETURN_VALUE"), - ] - ) - f.__code__ = bcode.to_code() - self.assertIs(f(), expected) + for cast in (False, True): + with self.subTest(cmp): + operation = Compare(cmp + (16 if cast else 0)) + print(f"Subtest: {operation.name}") + bcode = Bytecode( + ([Instr("RESUME", 0)] if sys.version_info >= (3, 11) else []) + + [ + Instr("LOAD_CONST", 24), + Instr("LOAD_CONST", 42), + Instr("COMPARE_OP", operation), + Instr("RETURN_VALUE"), + ] + ) + bcode.update_flags() + f.__code__ = bcode.to_code() + self.assertIs(f(), expected) if __name__ == "__main__": From 09937e472ee454c8699b540714b882d7f9c638de Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Sun, 11 Aug 2024 20:03:08 +0200 Subject: [PATCH 17/50] concrete: fix lookup of cache entry numbers (by name in 3.13 by opcode in 3.12) --- src/bytecode/concrete.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 1235cf70..17dfc4dd 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -181,8 +181,8 @@ def disassemble(cls: Type[T], lineno: Optional[int], code: bytes, offset: int) - def use_cache_opcodes(self) -> int: if sys.version_info >= (3, 13): return ( - dis._inline_cache_entries[self._opcode] - if self._opcode in dis._inline_cache_entries + dis._inline_cache_entries[self._name] + if self._name in dis._inline_cache_entries else 0 ) elif sys.version_info >= (3, 11): From 1408b4133dd0bf0b4f47851300207c8d80c23672 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 12 Aug 2024 17:59:30 +0200 Subject: [PATCH 18/50] concrete: properly extract cache entries when disassembling --- src/bytecode/concrete.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 17dfc4dd..af38c353 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -327,18 +327,20 @@ def from_code( # For Python 3.11+ we use dis to extract the detailed location information at # reduced maintenance cost. if PY311: - instructions = [ + instructions = [] + for i in dis.get_instructions(code, show_caches=True): + loc = InstrLocation.from_positions(i.positions) if i.positions else None # dis.get_instructions automatically handle extended arg which # we do not want, so we fold back arguments to be between 0 and 255 - ConcreteInstr( - i.opname, - i.arg % 256 if i.arg is not None else UNSET, - location=InstrLocation.from_positions(i.positions) - if i.positions - else None, + instructions.append( + ConcreteInstr( + i.opname, + i.arg % 256 if i.arg is not None else UNSET, + location=loc, + ) ) - for i in dis.get_instructions(code, show_caches=True) - ] + for _ in (i.cache_info or ()) if PY313 else (): + instructions.append(ConcreteInstr("CACHE", 0, location=loc)) else: if PY310: line_starts = {offset: lineno for offset, _, lineno in code.co_lines()} From deea4534c19437afe9c87118825a86a46b2cbbfd Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 13 Aug 2024 23:06:47 +0200 Subject: [PATCH 19/50] tests: fix misc tests --- tests/test_misc.py | 49 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/tests/test_misc.py b/tests/test_misc.py index 12bcb585..f249ab83 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -8,6 +8,7 @@ import bytecode from bytecode import BasicBlock, Bytecode, ControlFlowGraph, Instr, Label from bytecode.concrete import OFFSET_AS_INSTRUCTION +from bytecode.utils import PY313 from . import disassemble @@ -37,7 +38,7 @@ def func(test): code = disassemble(source, function=True) # without line numbers - enum_repr = "" + enum_repr = "" if PY313 else "" if sys.version_info >= (3, 12): expected = f""" RESUME 0 @@ -207,7 +208,7 @@ def func(test): code = ControlFlowGraph.from_bytecode(code) # without line numbers - enum_repr = "" + enum_repr = "" if PY313 else "" if sys.version_info >= (3, 12): expected = textwrap.dedent( f""" @@ -410,7 +411,28 @@ def func(test): code = code.to_concrete_bytecode() # without line numbers - if sys.version_info >= (3, 12): + if sys.version_info >= (3, 13): + # COMPARE_OP use the 4 lowest bits as a cache + expected = """ + 0 RESUME 0 + 2 LOAD_FAST 0 + 4 LOAD_CONST 1 + 6 COMPARE_OP 88 + 8 CACHE 0 + 10 POP_JUMP_IF_FALSE 2 + 12 CACHE 0 + 14 RETURN_CONST 1 + 16 LOAD_FAST 0 + 18 LOAD_CONST 2 + 20 COMPARE_OP 88 + 22 CACHE 0 + 24 POP_JUMP_IF_FALSE 2 + 26 CACHE 0 + 28 RETURN_CONST 2 + 30 RETURN_CONST 3 +""" + + elif sys.version_info >= (3, 12): # COMPARE_OP use the 4 lowest bits as a cache expected = """ 0 RESUME 0 @@ -470,7 +492,26 @@ def func(test): self.check_dump_bytecode(code, expected.lstrip("\n")) # with line numbers - if sys.version_info >= (3, 12): + if sys.version_info >= (3, 13): + expected = """ +L. 1 0: RESUME 0 +L. 2 2: LOAD_FAST 0 + 4: LOAD_CONST 1 + 6: COMPARE_OP 88 + 8: CACHE 0 + 10: POP_JUMP_IF_FALSE 2 + 12: CACHE 0 +L. 3 14: RETURN_CONST 1 +L. 4 16: LOAD_FAST 0 + 18: LOAD_CONST 2 + 20: COMPARE_OP 88 + 22: CACHE 0 + 24: POP_JUMP_IF_FALSE 2 + 26: CACHE 0 +L. 5 28: RETURN_CONST 2 +L. 6 30: RETURN_CONST 3 +""" + elif sys.version_info >= (3, 12): expected = """ L. 1 0: RESUME 0 L. 2 2: LOAD_FAST 0 From efdacb2f437eede82ed216cea45cdfb6fb0b0975 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 10:37:38 +0200 Subject: [PATCH 20/50] tests: fix 3.13 specific bytecode change requiring TO_BOOL cast --- tests/test_bytecode.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_bytecode.py b/tests/test_bytecode.py index 2982d8a9..d8952d12 100644 --- a/tests/test_bytecode.py +++ b/tests/test_bytecode.py @@ -220,12 +220,29 @@ def test_from_code(self): Instr("RETURN_VALUE", lineno=4), ], ) + elif sys.version_info < (3, 13): + self.assertInstructionListEqual( + bytecode, + [ + Instr("RESUME", 0, lineno=0), + Instr("LOAD_NAME", "test", lineno=1), + Instr("POP_JUMP_IF_FALSE", label_else, lineno=1), + Instr("LOAD_CONST", 1, lineno=2), + Instr("STORE_NAME", "x", lineno=2), + Instr("RETURN_CONST", None, lineno=2), + label_else, + Instr("LOAD_CONST", 2, lineno=4), + Instr("STORE_NAME", "x", lineno=4), + Instr("RETURN_CONST", None, lineno=4), + ], + ) else: self.assertInstructionListEqual( bytecode, [ Instr("RESUME", 0, lineno=0), Instr("LOAD_NAME", "test", lineno=1), + Instr("TO_BOOL", lineno=1), Instr("POP_JUMP_IF_FALSE", label_else, lineno=1), Instr("LOAD_CONST", 1, lineno=2), Instr("STORE_NAME", "x", lineno=2), From ae7730af18e7d7d02d1848544262d4e6702941cb Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 10:38:40 +0200 Subject: [PATCH 21/50] allow LOAD_FAST and friends to access CellVar --- src/bytecode/concrete.py | 21 +++++++++++++++++---- src/bytecode/instr.py | 8 ++++++-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index af38c353..07d9b1d4 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -978,12 +978,18 @@ def to_bytecode( # See PyCode_NewWithPosOnlyArgs if PY311: cells_lookup = self.varnames + [ - n for n in self.cellvars if n not in self.varnames + CellVar(n) for n in self.cellvars if n not in self.varnames ] ncells = len(cells_lookup) else: ncells = len(self.cellvars) - cells_lookup = self.cellvars + cells_lookup = [CellVar(n) for n in self.cellvars] + + # In Python 3.13+ LOAD_FAST can be used to retrieve cell values + if PY313: + locals_lookup = cells_lookup + else: + locals_lookup = self.varnames for lineno, c_instr in self._normalize_lineno( c_instructions, self.first_lineno @@ -1050,8 +1056,12 @@ def to_bytecode( arg = self.names[c_arg] elif opcode in _opcode.hasfree: if c_arg < ncells: - name = cells_lookup[c_arg] - arg = CellVar(name) + n_or_cell = cells_lookup[c_arg] + arg = ( + n_or_cell + if isinstance(n_or_cell, CellVar) + else CellVar(n_or_cell) + ) else: name = self.freevars[c_arg - ncells] arg = FreeVar(name) @@ -1262,6 +1272,9 @@ def concrete_instructions(self) -> None: arg = (self.add(self.varnames, arg[0]) << 4) + self.add( self.varnames, arg[1] ) + elif PY313 and isinstance(arg, CellVar): + cell_instrs.append(len(self.instructions)) + arg = self.bytecode.cellvars.index(arg.name) else: assert isinstance(arg, str) arg = self.add(self.varnames, arg) diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 09b9b53f..2773d1df 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -869,14 +869,18 @@ def _check_arg(self, name: str, opcode: int, arg: InstrArg) -> None: if not ( isinstance(arg, tuple) and len(arg) == 2 - and isinstance(arg[0], str) - and isinstance(arg[1], str) + and isinstance(arg[0], (str, CellVar)) + and isinstance(arg[1], (str, CellVar)) ): raise TypeError( "operation %s argument must be a tuple[str, str], " "got %s (value=%s)" % (name, type(arg).__name__, str(arg)) ) + elif PY313 and opcode in _opcode.haslocal and isinstance(arg, CellVar): + # Cell vars can be accessed using locals in Python 3.13+ + pass + elif not isinstance(arg, str): raise TypeError( "operation %s argument must be a str, " From 0e7280b893958766482aa7d6b9127a37dc4cb0d5 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 10:39:12 +0200 Subject: [PATCH 22/50] fix computation of jump offset in the presence of CACHE following a jump instruction --- src/bytecode/concrete.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 07d9b1d4..6ee7e8a6 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1391,8 +1391,7 @@ def compute_jumps(self) -> bool: # needed if a label is at the end label_offsets.append(offset) - # FIXME may need some extra check to validate jump forward vs jump backward - # fix argument of jump instructions: resolve labels + # Fix argument of jump instructions: resolve labels modified = False for index, label, instr in self.jumps: target_index = self.labels[label] @@ -1404,13 +1403,14 @@ def compute_jumps(self) -> bool: if PY312 and instr.name in ("FOR_ITER", "SEND"): target_offset -= 1 + # For jump using cache opcodes, an argument of 0 jumps to the + # first non cache instructions right after the jump instruction + instr_offset = label_offsets[index] + instr.use_cache_opcodes() if instr.is_forward_rel_jump(): - instr_offset = label_offsets[index] target_offset -= instr_offset + ( instr.size // 2 if OFFSET_AS_INSTRUCTION else instr.size ) elif instr.is_backward_rel_jump(): - instr_offset = label_offsets[index] target_offset = ( instr_offset + (instr.size // 2 if OFFSET_AS_INSTRUCTION else instr.size) From a6fd52c63988a01bee096878045de9fca5337ddf Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 11:26:28 +0200 Subject: [PATCH 23/50] tests: fix tests broken on <3.13 --- src/bytecode/cfg.py | 4 ++-- src/bytecode/instr.py | 2 +- tests/test_concrete.py | 1 + tests/test_instr.py | 7 ++++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/bytecode/cfg.py b/src/bytecode/cfg.py index 97329980..32a116ab 100644 --- a/src/bytecode/cfg.py +++ b/src/bytecode/cfg.py @@ -952,8 +952,8 @@ def to_bytecode(self) -> _bytecode.Bytecode: # TryEnd/TryBegin pair which share the same target. # In each case, we store the value found in the CFG and the value # inserted in the bytecode. - last_try_begin: tuple[TryBegin, TryBegin] | None = None - last_try_end: tuple[TryEnd, TryEnd] | None = None + last_try_begin: Tuple[TryBegin, TryBegin] | None = None + last_try_end: Tuple[TryEnd, TryEnd] | None = None for block in self: if id(block) in used_blocks: diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 2773d1df..1ab0e3ad 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -43,7 +43,7 @@ HASJREL = _opcode.hasjrel #: Opcodes taking 2 arguments (highest 4 bits and lowest 4 bits) -DUAL_ARG_OPCODES: tuple[int, ...] = () +DUAL_ARG_OPCODES: Tuple[int, ...] = () if PY313: DUAL_ARG_OPCODES = ( _opcode.opmap["LOAD_FAST_LOAD_FAST"], diff --git a/tests/test_concrete.py b/tests/test_concrete.py index b3045149..9737cc3b 100644 --- a/tests/test_concrete.py +++ b/tests/test_concrete.py @@ -21,6 +21,7 @@ SetLineno, ) from bytecode.concrete import OFFSET_AS_INSTRUCTION, ExceptionTableEntry +from bytecode.utils import PY313 from . import TestCase, get_code diff --git a/tests/test_instr.py b/tests/test_instr.py index 12efa42a..00befeda 100644 --- a/tests/test_instr.py +++ b/tests/test_instr.py @@ -24,6 +24,7 @@ Intrinsic2Op, opcode_has_argument, ) +from bytecode.utils import PY311, PY313 from . import TestCase @@ -31,13 +32,13 @@ # Starting with Python 3.11 jump opcode have changed quite a bit. We define here # opcode useful to test for both Python < 3.11 and Python >= 3.11 -UNCONDITIONAL_JUMP = "JUMP_FORWARD" if sys.version_info >= (3, 11) else "JUMP_ABSOLUTE" +UNCONDITIONAL_JUMP = "JUMP_FORWARD" if PY311 else "JUMP_ABSOLUTE" CONDITIONAL_JUMP = ( "POP_JUMP_FORWARD_IF_TRUE" if (3, 12) > sys.version_info >= (3, 11) else "POP_JUMP_IF_TRUE" ) -CALL = "CALL" if sys.version_info >= (3, 11) else "CALL_FUNCTION" +CALL = "CALL" if PY311 else "CALL_FUNCTION" class SetLinenoTests(TestCase): @@ -474,7 +475,7 @@ def f(): params = zip(iter(Compare), (True, True, False, True, False, False)) for cmp, expected in params: - for cast in (False, True): + for cast in (False, True) if PY313 else (False,): with self.subTest(cmp): operation = Compare(cmp + (16 if cast else 0)) print(f"Subtest: {operation.name}") From 2d0487991458afabfe404b4a9f6e8153a245a23c Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 12:32:41 +0200 Subject: [PATCH 24/50] fix typing issues --- src/bytecode/concrete.py | 4 +++- src/bytecode/instr.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 6ee7e8a6..2884a022 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -339,7 +339,8 @@ def from_code( location=loc, ) ) - for _ in (i.cache_info or ()) if PY313 else (): + # cache_info only exist on 3.13+ + for _ in (i.cache_info or ()) if PY313 else (): # type: ignore instructions.append(ConcreteInstr("CACHE", 0, location=loc)) else: if PY310: @@ -986,6 +987,7 @@ def to_bytecode( cells_lookup = [CellVar(n) for n in self.cellvars] # In Python 3.13+ LOAD_FAST can be used to retrieve cell values + locals_lookup: Sequence[Union[str, CellVar]] if PY313: locals_lookup = cells_lookup else: diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 1ab0e3ad..af55876f 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -799,6 +799,7 @@ def _cmp_key(self) -> Tuple[Optional[InstrLocation], str, Any]: Compare, Tuple[bool, str], Tuple[bool, bool, str], + Tuple[Union[str, CellVar], Union[str, CellVar]], ] From c815b96587c8282efb80091d8d1e2bda195003d0 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 14:16:43 +0200 Subject: [PATCH 25/50] remove FOR_ITER and SEND special case in handling jump now that CACHE handling is more generic --- src/bytecode/concrete.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 2884a022..bf339a1c 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1399,12 +1399,6 @@ def compute_jumps(self) -> bool: target_index = self.labels[label] target_offset = label_offsets[target_index] - # FIXME use opcode - # Under 3.12+, FOR_ITER, SEND jump is increased by 1 implicitely - # to skip over END_FOR, END_SEND see Python/instrumentation.c - if PY312 and instr.name in ("FOR_ITER", "SEND"): - target_offset -= 1 - # For jump using cache opcodes, an argument of 0 jumps to the # first non cache instructions right after the jump instruction instr_offset = label_offsets[index] + instr.use_cache_opcodes() From 5a424a2e974174a5cb0622975f2262c162e206c7 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 16:25:08 +0200 Subject: [PATCH 26/50] concrete: fix generation of CACHE entries under Python 3.13 --- src/bytecode/concrete.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index bf339a1c..cc68feb6 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -340,8 +340,9 @@ def from_code( ) ) # cache_info only exist on 3.13+ - for _ in (i.cache_info or ()) if PY313 else (): # type: ignore - instructions.append(ConcreteInstr("CACHE", 0, location=loc)) + for _, size, _ in (i.cache_info or ()) if PY313 else (): # type: ignore + for _ in range(size): + instructions.append(ConcreteInstr("CACHE", 0, location=loc)) else: if PY310: line_starts = {offset: lineno for offset, _, lineno in code.co_lines()} From 869d7daba8587a835cdf2faea18f9cafad67613f Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 16:26:00 +0200 Subject: [PATCH 27/50] cfg: update stack depth computation for generator which have no implicit initial stack size anymore --- src/bytecode/cfg.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/bytecode/cfg.py b/src/bytecode/cfg.py index 32a116ab..f13ac403 100644 --- a/src/bytecode/cfg.py +++ b/src/bytecode/cfg.py @@ -23,7 +23,7 @@ from bytecode.concrete import ConcreteInstr from bytecode.flags import CompilerFlags from bytecode.instr import UNSET, Instr, Label, SetLineno, TryBegin, TryEnd -from bytecode.utils import PY310, PY311 +from bytecode.utils import PY310, PY311, PY313 T = TypeVar("T", bound="BasicBlock") U = TypeVar("U", bound="ControlFlowGraph") @@ -520,10 +520,15 @@ def compute_stacksize( # Starting with Python 3.10, generator and coroutines start with one object # on the stack (None, anything is an error). initial_stack_size = 0 - if PY310 and self.flags & ( - CompilerFlags.GENERATOR - | CompilerFlags.COROUTINE - | CompilerFlags.ASYNC_GENERATOR + if ( + not PY313 # under 3.13+ RETURN_GENERATOR make this explicit + and PY310 + and self.flags + & ( + CompilerFlags.GENERATOR + | CompilerFlags.COROUTINE + | CompilerFlags.ASYNC_GENERATOR + ) ): initial_stack_size = 1 From a4d9fc026ef7b9fa16a89f17a9c20bd157e339bb Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 16:26:23 +0200 Subject: [PATCH 28/50] flags: update inference to use RETURN_GENRATOR opcode --- src/bytecode/flags.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bytecode/flags.py b/src/bytecode/flags.py index d67bf887..a11b411d 100644 --- a/src/bytecode/flags.py +++ b/src/bytecode/flags.py @@ -111,7 +111,7 @@ def infer_flags( ) sure_generator = instr_names & {"YIELD_VALUE"} - maybe_generator = instr_names & {"YIELD_VALUE", "YIELD_FROM"} + maybe_generator = instr_names & {"YIELD_VALUE", "YIELD_FROM", "RETURN_GENERATOR"} sure_async = instr_names & { "GET_AWAITABLE", From 4e98f9213333081a3c599cc1f736804c5e07d110 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 20 Aug 2024 16:26:36 +0200 Subject: [PATCH 29/50] tests: fix typo --- tests/test_misc.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_misc.py b/tests/test_misc.py index f249ab83..11f1d894 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -419,14 +419,14 @@ def func(test): 4 LOAD_CONST 1 6 COMPARE_OP 88 8 CACHE 0 - 10 POP_JUMP_IF_FALSE 2 + 10 POP_JUMP_IF_FALSE 1 12 CACHE 0 14 RETURN_CONST 1 16 LOAD_FAST 0 18 LOAD_CONST 2 20 COMPARE_OP 88 22 CACHE 0 - 24 POP_JUMP_IF_FALSE 2 + 24 POP_JUMP_IF_FALSE 1 26 CACHE 0 28 RETURN_CONST 2 30 RETURN_CONST 3 @@ -458,7 +458,7 @@ def func(test): 6 COMPARE_OP 2 8 CACHE 0 10 CACHE 0 - 12 POP_JUMP_FORWARD_IF_FALSE 2 + 12 POP_JUMP_FORWARD_IF_FALSE 1 14 LOAD_CONST 1 16 RETURN_VALUE 18 LOAD_FAST 0 @@ -466,7 +466,7 @@ def func(test): 22 COMPARE_OP 2 24 CACHE 0 26 CACHE 0 - 28 POP_JUMP_FORWARD_IF_FALSE 2 + 28 POP_JUMP_FORWARD_IF_FALSE 1 30 LOAD_CONST 2 32 RETURN_VALUE 34 LOAD_CONST 3 @@ -499,14 +499,14 @@ def func(test): 4: LOAD_CONST 1 6: COMPARE_OP 88 8: CACHE 0 - 10: POP_JUMP_IF_FALSE 2 + 10: POP_JUMP_IF_FALSE 1 12: CACHE 0 L. 3 14: RETURN_CONST 1 L. 4 16: LOAD_FAST 0 18: LOAD_CONST 2 20: COMPARE_OP 88 22: CACHE 0 - 24: POP_JUMP_IF_FALSE 2 + 24: POP_JUMP_IF_FALSE 1 26: CACHE 0 L. 5 28: RETURN_CONST 2 L. 6 30: RETURN_CONST 3 @@ -536,7 +536,7 @@ def func(test): 6: COMPARE_OP 2 8: CACHE 0 10: CACHE 0 - 12: POP_JUMP_FORWARD_IF_FALSE 2 + 12: POP_JUMP_FORWARD_IF_FALSE 1 L. 3 14: LOAD_CONST 1 16: RETURN_VALUE L. 4 18: LOAD_FAST 0 @@ -544,7 +544,7 @@ def func(test): 22: COMPARE_OP 2 24: CACHE 0 26: CACHE 0 - 28: POP_JUMP_FORWARD_IF_FALSE 2 + 28: POP_JUMP_FORWARD_IF_FALSE 1 L. 5 30: LOAD_CONST 2 32: RETURN_VALUE L. 6 34: LOAD_CONST 3 From c8bd9ab46cdd9d7d6695cf4fb8fce0ce1cd5c27a Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 21 Aug 2024 08:19:59 +0200 Subject: [PATCH 30/50] concrete: properly generate CACHE for the last instruction --- src/bytecode/concrete.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index cc68feb6..ac542987 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1,5 +1,6 @@ import dis import inspect +import itertools import opcode as _opcode import struct import sys @@ -1188,7 +1189,9 @@ def concrete_instructions(self) -> None: cell_instrs: List[int] = [] free_instrs: List[int] = [] - for instr in self.bytecode: + # We use None as a sentinel to ensure caches for the last instruction are + # properly generated. + for instr in itertools.chain(self.bytecode, [None]): # Enforce proper use of CACHE opcode on Python 3.11+ by checking we get the # number we expect or directly generate the needed ones. if isinstance(instr, Instr) and instr.name == "CACHE": @@ -1213,10 +1216,13 @@ def concrete_instructions(self) -> None: self.seen_manual_cache = False else: raise RuntimeError( - "Found some manual opcode but less than expected. " + "Found some manual CACHE opcode but less than expected. " f"Missing {self.required_caches} CACHE opcodes." ) + if instr is None: + continue + if isinstance(instr, Label): self.labels[instr] = len(self.instructions) continue From 8d312e5bcb2e565012888d23d242e7129e5c4190 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 21 Aug 2024 08:24:37 +0200 Subject: [PATCH 31/50] tests: fix last tests --- tests/test_bytecode.py | 5 ++++- tests/test_cfg.py | 36 +++++++++++++++++++++++++++++++++++- tests/test_concrete.py | 25 +++++++++++++++++++------ 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/tests/test_bytecode.py b/tests/test_bytecode.py index d8952d12..59f8bcfd 100644 --- a/tests/test_bytecode.py +++ b/tests/test_bytecode.py @@ -8,6 +8,7 @@ from bytecode import Bytecode, ConcreteInstr, FreeVar, Instr, Label, SetLineno from bytecode.instr import BinaryOp +from bytecode.utils import PY313 from . import TestCase, get_code @@ -422,7 +423,9 @@ def test_negative_size_unary_with_disable_check_of_pre_and_post(self): code.first_lineno = 1 code.extend([Instr(opname)]) co = code.to_code(check_pre_and_post=False) - self.assertEqual(co.co_stacksize, 0) + # In 3.13 the code object constructor fixes the stacksize for us... + if not PY313: + self.assertEqual(co.co_stacksize, 0) def test_negative_size_binary(self): operations = ( diff --git a/tests/test_cfg.py b/tests/test_cfg.py index 84dbb309..24c19e41 100644 --- a/tests/test_cfg.py +++ b/tests/test_cfg.py @@ -3,6 +3,7 @@ import contextlib import inspect import io +import opcode import sys import textwrap import types @@ -19,6 +20,7 @@ dump_bytecode, ) from bytecode.concrete import OFFSET_AS_INSTRUCTION +from bytecode.utils import PY313, PY311 from . import TestCase, disassemble as _disassemble @@ -611,6 +613,7 @@ def test_to_code(self): block0.extend( [ Instr("LOAD_FAST", "x", lineno=4), + *([Instr("TO_BOOL", lineno=4)] if PY313 else []), Instr( "POP_JUMP_FORWARD_IF_FALSE" if (3, 12) > sys.version_info >= (3, 11) @@ -632,7 +635,38 @@ def test_to_code(self): ] ) - if sys.version_info >= (3, 11): + if PY313: + expected = bytes( + ( + opcode.opmap["LOAD_FAST"], + 5, + opcode.opmap["TO_BOOL"], + 0, + 0, + 0, + 0, + 0, + 0, + 0, + opcode.opmap["POP_JUMP_IF_FALSE"], + 2, + 0, + 0, + opcode.opmap["LOAD_FAST"], + 0, + opcode.opmap["STORE_FAST"], + 5, + opcode.opmap["LOAD_CONST"], + 1, + opcode.opmap["STORE_FAST"], + 5, + opcode.opmap["LOAD_FAST"], + 5, + opcode.opmap["RETURN_VALUE"], + 0, + ) + ) + elif PY311: # jump is relative not absolute expected = ( b"|\x05" b"r\x02" b"|\x00" b"}\x05" b"d\x01" b"}\x05" b"|\x05" b"S\x00" diff --git a/tests/test_concrete.py b/tests/test_concrete.py index 9737cc3b..3f50615a 100644 --- a/tests/test_concrete.py +++ b/tests/test_concrete.py @@ -652,7 +652,7 @@ def test_explicit_stacksize(self): # Then with something bogus. We probably don't want to advertise this # in the documentation. If this fails then decide if it's for good # reason, and remove if so. - explicit_stacksize = 0 + explicit_stacksize = code_obj.co_stacksize - 1 new_code_obj = concrete.to_code( stacksize=explicit_stacksize, compute_exception_stack_depths=False ) @@ -1445,6 +1445,7 @@ def test_label2(self): 7 if OFFSET_AS_INSTRUCTION else 14, lineno=1, ), + *([ConcreteInstr("CACHE")] if PY313 else []), ConcreteInstr("LOAD_CONST", 0, lineno=2), ConcreteInstr("STORE_NAME", 1, lineno=2), ConcreteInstr("JUMP_FORWARD", 2 if OFFSET_AS_INSTRUCTION else 4, lineno=2), @@ -1559,8 +1560,6 @@ def test_setlineno(self): ) def test_extended_jump(self): - NOP = bytes((opcode.opmap["NOP"], 0)) - # code using jumps > 0xffff to test extended arg nb_nop = 2**16 if OFFSET_AS_INSTRUCTION else 2**15 # The length of the jump is independent of the number of instruction @@ -1577,9 +1576,22 @@ def test_extended_jump(self): ) code_obj = code.to_code() - # We use 2 extended args (0x90) out of the maximum 3 which are allowed - i_code = opcode.opmap["JUMP_FORWARD"].to_bytes(1, "little") - expected = b"\x90\x01\x90\x00" + i_code + b"\x00" + NOP * nb_nop + b"d\x00S\x00" + # We use 2 extended args out of the maximum 3 which are allowed + expected = bytes( + ( + opcode.EXTENDED_ARG, + 1, + opcode.EXTENDED_ARG, + 0, + opcode.opmap["JUMP_FORWARD"], + 0, + *([opcode.opmap["NOP"], 0] * nb_nop), + opcode.opmap["LOAD_CONST"], + 0, + opcode.opmap["RETURN_VALUE"], + 0, + ) + ) self.assertSequenceEqual(code_obj.co_code, expected) def test_jumps(self): @@ -1621,6 +1633,7 @@ def test_jumps(self): 5 if OFFSET_AS_INSTRUCTION else 10, lineno=1, ), + *([ConcreteInstr("CACHE")] if PY313 else []), ConcreteInstr("LOAD_CONST", 0, lineno=2), ConcreteInstr("STORE_NAME", 1, lineno=2), ConcreteInstr("JUMP_FORWARD", 2 if OFFSET_AS_INSTRUCTION else 4, lineno=2), From 5497da2b19e8e703dcc69129d2290ea9e1be5c97 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 21 Aug 2024 08:25:50 +0200 Subject: [PATCH 32/50] cis: start running framework tests on 3.13 --- .github/workflows/frameworks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/frameworks.yml b/.github/workflows/frameworks.yml index 3b924df7..980e702c 100644 --- a/.github/workflows/frameworks.yml +++ b/.github/workflows/frameworks.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev"] steps: - uses: actions/checkout@v4 From a1573231311860819efb3f5328f3593f076734f2 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 21 Aug 2024 08:28:12 +0200 Subject: [PATCH 33/50] tests: fix broken test on 3.11 --- tests/test_misc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_misc.py b/tests/test_misc.py index 11f1d894..033b59a8 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -458,7 +458,7 @@ def func(test): 6 COMPARE_OP 2 8 CACHE 0 10 CACHE 0 - 12 POP_JUMP_FORWARD_IF_FALSE 1 + 12 POP_JUMP_FORWARD_IF_FALSE 2 14 LOAD_CONST 1 16 RETURN_VALUE 18 LOAD_FAST 0 @@ -466,7 +466,7 @@ def func(test): 22 COMPARE_OP 2 24 CACHE 0 26 CACHE 0 - 28 POP_JUMP_FORWARD_IF_FALSE 1 + 28 POP_JUMP_FORWARD_IF_FALSE 2 30 LOAD_CONST 2 32 RETURN_VALUE 34 LOAD_CONST 3 @@ -536,7 +536,7 @@ def func(test): 6: COMPARE_OP 2 8: CACHE 0 10: CACHE 0 - 12: POP_JUMP_FORWARD_IF_FALSE 1 + 12: POP_JUMP_FORWARD_IF_FALSE 2 L. 3 14: LOAD_CONST 1 16: RETURN_VALUE L. 4 18: LOAD_FAST 0 @@ -544,7 +544,7 @@ def func(test): 22: COMPARE_OP 2 24: CACHE 0 26: CACHE 0 - 28: POP_JUMP_FORWARD_IF_FALSE 1 + 28: POP_JUMP_FORWARD_IF_FALSE 2 L. 5 30: LOAD_CONST 2 32: RETURN_VALUE L. 6 34: LOAD_CONST 3 From ab4a15a1bbacb127b3602d0bd2a822c48878eebc Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 21 Aug 2024 08:35:12 +0200 Subject: [PATCH 34/50] appease linters --- tests/test_cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cfg.py b/tests/test_cfg.py index 24c19e41..75e92d25 100644 --- a/tests/test_cfg.py +++ b/tests/test_cfg.py @@ -20,7 +20,7 @@ dump_bytecode, ) from bytecode.concrete import OFFSET_AS_INSTRUCTION -from bytecode.utils import PY313, PY311 +from bytecode.utils import PY311, PY313 from . import TestCase, disassemble as _disassemble From 3fa8f8492e4f1860199215b423623724acb52108 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Wed, 21 Aug 2024 08:37:13 +0200 Subject: [PATCH 35/50] cis: typo --- .github/workflows/frameworks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/frameworks.yml b/.github/workflows/frameworks.yml index 980e702c..84135f67 100644 --- a/.github/workflows/frameworks.yml +++ b/.github/workflows/frameworks.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 From e780c34d9392a90751fe1972e0f0070465b4facf Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Tue, 3 Sep 2024 19:33:05 +0200 Subject: [PATCH 36/50] concrete: optimize packing lines for multiple instructions at line 0 --- src/bytecode/concrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index ac542987..4b7ffb4b 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -726,7 +726,7 @@ def _assemble_locations( location = location or InstrLocation(new_lineno, None, None, None) # Group together instruction with equivalent locations - if old_location.lineno and old_location == location: + if old_location.lineno is not None and old_location == location: size += i_size continue From d92390c0c5f24d8be60f94727ecbdaca32f7e988 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 5 Sep 2024 12:50:51 +0200 Subject: [PATCH 37/50] concrete: preserve complete location rather than lineno when converting from Bytecode to ConcreteBytecode --- src/bytecode/concrete.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 4b7ffb4b..7e79bd70 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -722,8 +722,8 @@ def _assemble_locations( # We track the last set lineno to be able to compute deltas for _, i_size, new_lineno, location in iter_in: - # Infer the line if location is None - location = location or InstrLocation(new_lineno, None, None, None) + # Infer the location if location is None + location = location or old_location # Group together instruction with equivalent locations if old_location.lineno is not None and old_location == location: @@ -1183,7 +1183,7 @@ def add(names: List[str], name: str) -> int: return index def concrete_instructions(self) -> None: - lineno = self.bytecode.first_lineno + location = InstrLocation(self.bytecode.first_lineno, None, None, None) # Track instruction (index) using cell vars and free vars to be able to update # the index used once all the names are known. cell_instrs: List[int] = [] @@ -1228,7 +1228,7 @@ def concrete_instructions(self) -> None: continue if isinstance(instr, SetLineno): - lineno = instr.lineno + location = InstrLocation(instr.lineno, None, None, None) continue if isinstance(instr, TryBegin): @@ -1255,10 +1255,10 @@ def concrete_instructions(self) -> None: assert isinstance(instr, Instr) - if instr.lineno is not UNSET and instr.lineno is not None: - lineno = instr.lineno - elif instr.lineno is UNSET: - instr.lineno = lineno + if instr.location is not UNSET and instr.location is not None: + location = instr.location + elif instr.location is UNSET: + instr.location = location opcode = instr._opcode arg = instr.arg From a97ae31b55c1bc86a8d95fbc428973e157ec54f0 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 9 Sep 2024 20:59:24 +0200 Subject: [PATCH 38/50] tests: fix broken test following SetLineno handling update --- tests/test_bytecode.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tests/test_bytecode.py b/tests/test_bytecode.py index 59f8bcfd..2f27d64c 100644 --- a/tests/test_bytecode.py +++ b/tests/test_bytecode.py @@ -7,7 +7,7 @@ import unittest from bytecode import Bytecode, ConcreteInstr, FreeVar, Instr, Label, SetLineno -from bytecode.instr import BinaryOp +from bytecode.instr import BinaryOp, InstrLocation from bytecode.utils import PY313 from . import TestCase, get_code @@ -351,12 +351,24 @@ def test_setlineno(self): self.assertListEqual( list(concrete), [ - ConcreteInstr("LOAD_CONST", 0, lineno=3), - ConcreteInstr("STORE_NAME", 0, lineno=3), - ConcreteInstr("LOAD_CONST", 1, lineno=4), - ConcreteInstr("STORE_NAME", 1, lineno=4), - ConcreteInstr("LOAD_CONST", 2, lineno=5), - ConcreteInstr("STORE_NAME", 2, lineno=5), + ConcreteInstr( + "LOAD_CONST", 0, location=InstrLocation(3, None, None, None) + ), + ConcreteInstr( + "STORE_NAME", 0, location=InstrLocation(3, None, None, None) + ), + ConcreteInstr( + "LOAD_CONST", 1, location=InstrLocation(4, None, None, None) + ), + ConcreteInstr( + "STORE_NAME", 1, location=InstrLocation(4, None, None, None) + ), + ConcreteInstr( + "LOAD_CONST", 2, location=InstrLocation(5, None, None, None) + ), + ConcreteInstr( + "STORE_NAME", 2, location=InstrLocation(5, None, None, None) + ), ], ) From a379c3eedf80e083a0fe03bc98ba2bd80255a0ef Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 9 Sep 2024 21:03:05 +0200 Subject: [PATCH 39/50] concrete: fix broken inference of locations --- src/bytecode/concrete.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 7e79bd70..2a6d49bf 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1257,8 +1257,6 @@ def concrete_instructions(self) -> None: if instr.location is not UNSET and instr.location is not None: location = instr.location - elif instr.location is UNSET: - instr.location = location opcode = instr._opcode arg = instr.arg @@ -1340,7 +1338,7 @@ def concrete_instructions(self) -> None: # The above should have performed all the necessary conversion assert isinstance(arg, int) - c_instr = ConcreteInstr(instr.name, arg, location=instr.location) + c_instr = ConcreteInstr(instr.name, arg, location=location) if is_jump: self.jumps.append((len(self.instructions), label, c_instr)) From b14092a743ac55926ee33a794351ccf3e0729ffd Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Mon, 9 Sep 2024 21:08:45 +0200 Subject: [PATCH 40/50] fix linting issue --- src/bytecode/concrete.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 2a6d49bf..ebda1960 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -721,7 +721,7 @@ def _assemble_locations( lineno = first_lineno # We track the last set lineno to be able to compute deltas - for _, i_size, new_lineno, location in iter_in: + for _, i_size, _, location in iter_in: # Infer the location if location is None location = location or old_location From 4fb730f88202abf560b3bf6ec428f7220175f43c Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 17 Oct 2024 12:50:41 +0200 Subject: [PATCH 41/50] instr: allow LOAD_FAST to refer to FreeVar in Python 3.13 --- src/bytecode/concrete.py | 9 +++++++-- src/bytecode/instr.py | 6 +++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index ebda1960..8b1e6f14 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -989,9 +989,11 @@ def to_bytecode( cells_lookup = [CellVar(n) for n in self.cellvars] # In Python 3.13+ LOAD_FAST can be used to retrieve cell values - locals_lookup: Sequence[Union[str, CellVar]] + locals_lookup: Sequence[Union[str, CellVar, FreeVar]] if PY313: - locals_lookup = cells_lookup + locals_lookup = cells_lookup + [ + FreeVar(n) for n in self.freevars if n not in self.varnames + ] else: locals_lookup = self.varnames @@ -1282,6 +1284,9 @@ def concrete_instructions(self) -> None: elif PY313 and isinstance(arg, CellVar): cell_instrs.append(len(self.instructions)) arg = self.bytecode.cellvars.index(arg.name) + elif PY313 and isinstance(arg, FreeVar): + free_instrs.append(len(self.instructions)) + arg = self.bytecode.freevars.index(arg.name) else: assert isinstance(arg, str) arg = self.add(self.varnames, arg) diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index af55876f..e150e064 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -878,7 +878,11 @@ def _check_arg(self, name: str, opcode: int, arg: InstrArg) -> None: "got %s (value=%s)" % (name, type(arg).__name__, str(arg)) ) - elif PY313 and opcode in _opcode.haslocal and isinstance(arg, CellVar): + elif ( + PY313 + and opcode in _opcode.haslocal + and isinstance(arg, (CellVar, FreeVar)) + ): # Cell vars can be accessed using locals in Python 3.13+ pass From 6d2844106959d9b301490a46da9fc0595c8803b6 Mon Sep 17 00:00:00 2001 From: Matthieu Dartiailh Date: Thu, 17 Oct 2024 13:07:37 +0200 Subject: [PATCH 42/50] Update src/bytecode/instr.py Co-authored-by: Gabriele N. Tornetta --- src/bytecode/instr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index e150e064..4a947822 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -883,7 +883,7 @@ def _check_arg(self, name: str, opcode: int, arg: InstrArg) -> None: and opcode in _opcode.haslocal and isinstance(arg, (CellVar, FreeVar)) ): - # Cell vars can be accessed using locals in Python 3.13+ + # Cell and free vars can be accessed using locals in Python 3.13+ pass elif not isinstance(arg, str): From 2f40b98b124b68ef11c23557b6b0490cb27df7c2 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 17 Oct 2024 13:35:06 +0200 Subject: [PATCH 43/50] test: add a test about free var handling --- tests/cell_free_vars_cases.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/cell_free_vars_cases.py b/tests/cell_free_vars_cases.py index a16096c3..2bc4b623 100644 --- a/tests/cell_free_vars_cases.py +++ b/tests/cell_free_vars_cases.py @@ -56,6 +56,20 @@ def f(self): return B().f +def test_freevar(): + class Foo: + r = 0 + + @classmethod + def bar(cls, k): + class Snafu(k): + def do_debug(self, arg): + cls.r += 1 + return super().d(arg) + + return Snafu + + # NOTE this is not really a cell var case but it ensures proper # placements of CACHE vs labels _localedirs = {} @@ -76,6 +90,7 @@ def bindtextdomain(domain="", localedir=None): class_super, class_loadderef, bindtextdomain, + test_freevar, ] if __name__ == "__main__": From 260c78e3f7328234dcfd19a10313a7868032c738 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 17 Oct 2024 22:38:14 +0200 Subject: [PATCH 44/50] fix typing --- src/bytecode/instr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 4a947822..f575d255 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -799,7 +799,7 @@ def _cmp_key(self) -> Tuple[Optional[InstrLocation], str, Any]: Compare, Tuple[bool, str], Tuple[bool, bool, str], - Tuple[Union[str, CellVar], Union[str, CellVar]], + Tuple[Union[str, CellVar, FreeVar], Union[str, CellVar, FreeVar]], ] From 4307726a4e2034b87509c84c7e4ab574c05d1c80 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 17 Oct 2024 22:39:08 +0200 Subject: [PATCH 45/50] concrete: fix handling of dual opcode when one of the arg is too large --- src/bytecode/concrete.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 8b1e6f14..0749bcb8 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1191,6 +1191,16 @@ def concrete_instructions(self) -> None: cell_instrs: List[int] = [] free_instrs: List[int] = [] + # On 3.13+, try to use small indexes for names used in dual arg opcode + # to improve the chances to be able to use them (since we cannot use + # only the 15 first names. + if PY313: + for instr in self.bytecode: + if isinstance(instr, Instr) and instr._opcode in DUAL_ARG_OPCODES: + assert isinstance(instr.arg, tuple) + for arg in instr.arg: + self.add(self.varnames, arg) + # We use None as a sentinel to ensure caches for the last instruction are # properly generated. for instr in itertools.chain(self.bytecode, [None]): @@ -1260,6 +1270,7 @@ def concrete_instructions(self) -> None: if instr.location is not UNSET and instr.location is not None: location = instr.location + instr_name = instr.name opcode = instr._opcode arg = instr.arg is_jump = False @@ -1278,9 +1289,18 @@ def concrete_instructions(self) -> None: and isinstance(arg[0], str) and isinstance(arg[1], str) ) - arg = (self.add(self.varnames, arg[0]) << 4) + self.add( - self.varnames, arg[1] - ) + arg1_index = self.add(self.varnames, arg[0]) + arg2_index = self.add(self.varnames, arg[1]) + if arg1_index > 16 or arg2_index > 16: + parts = instr.name.split("_") + n1 = "_".join(parts[:2]) + n2 = "_".join(parts[2:]) + c_instr = ConcreteInstr(n1, arg1_index, location=location) + self.instructions.append(c_instr) + instr_name = n2 + arg = arg2_index + else: + arg = (arg1_index << 4) + arg2_index elif PY313 and isinstance(arg, CellVar): cell_instrs.append(len(self.instructions)) arg = self.bytecode.cellvars.index(arg.name) @@ -1343,7 +1363,7 @@ def concrete_instructions(self) -> None: # The above should have performed all the necessary conversion assert isinstance(arg, int) - c_instr = ConcreteInstr(instr.name, arg, location=location) + c_instr = ConcreteInstr(instr_name, arg, location=location) if is_jump: self.jumps.append((len(self.instructions), label, c_instr)) From 5d5c1d7c33832ee2399aa9432f155991825d5295 Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 17 Oct 2024 22:54:17 +0200 Subject: [PATCH 46/50] fix typing issues --- src/bytecode/concrete.py | 49 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 0749bcb8..eeb1c0c3 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -182,8 +182,8 @@ def disassemble(cls: Type[T], lineno: Optional[int], code: bytes, offset: int) - def use_cache_opcodes(self) -> int: if sys.version_info >= (3, 13): return ( - dis._inline_cache_entries[self._name] - if self._name in dis._inline_cache_entries + dis._inline_cache_entries[self._name] # type: ignore[attr-defined] + if self._name in dis._inline_cache_entries # type: ignore[attr-defined] else 0 ) elif sys.version_info >= (3, 11): @@ -1195,11 +1195,12 @@ def concrete_instructions(self) -> None: # to improve the chances to be able to use them (since we cannot use # only the 15 first names. if PY313: - for instr in self.bytecode: - if isinstance(instr, Instr) and instr._opcode in DUAL_ARG_OPCODES: - assert isinstance(instr.arg, tuple) - for arg in instr.arg: - self.add(self.varnames, arg) + for binstr in self.bytecode: + if isinstance(binstr, Instr) and binstr._opcode in DUAL_ARG_OPCODES: + assert isinstance(binstr.arg, tuple) + for parg in binstr.arg: + assert isinstance(parg, str) + self.add(self.varnames, parg) # We use None as a sentinel to ensure caches for the last instruction are # properly generated. @@ -1277,10 +1278,10 @@ def concrete_instructions(self) -> None: if isinstance(arg, Label): label = arg # fake value, real value is set in compute_jumps() - arg = 0 + c_arg = 0 is_jump = True elif opcode in _opcode.hasconst: - arg = self.add_const(arg) + c_arg = self.add_const(arg) elif opcode in _opcode.haslocal: if opcode in DUAL_ARG_OPCODES: assert ( @@ -1298,18 +1299,18 @@ def concrete_instructions(self) -> None: c_instr = ConcreteInstr(n1, arg1_index, location=location) self.instructions.append(c_instr) instr_name = n2 - arg = arg2_index + c_arg = arg2_index else: - arg = (arg1_index << 4) + arg2_index + c_arg = (arg1_index << 4) + arg2_index elif PY313 and isinstance(arg, CellVar): cell_instrs.append(len(self.instructions)) - arg = self.bytecode.cellvars.index(arg.name) + c_arg = self.bytecode.cellvars.index(arg.name) elif PY313 and isinstance(arg, FreeVar): free_instrs.append(len(self.instructions)) - arg = self.bytecode.freevars.index(arg.name) + c_arg = self.bytecode.freevars.index(arg.name) else: assert isinstance(arg, str) - arg = self.add(self.varnames, arg) + c_arg = self.add(self.varnames, arg) elif opcode in _opcode.hasname: if opcode in BITFLAG_OPCODES: assert ( @@ -1319,7 +1320,7 @@ def concrete_instructions(self) -> None: and isinstance(arg[1], str) ), arg index = self.add(self.names, arg[1]) - arg = int(arg[0]) + (index << 1) + c_arg = int(arg[0]) + (index << 1) elif opcode in BITFLAG2_OPCODES: assert ( isinstance(arg, tuple) @@ -1329,24 +1330,24 @@ def concrete_instructions(self) -> None: and isinstance(arg[2], str) ), arg index = self.add(self.names, arg[2]) - arg = int(arg[0]) + 2 * int(arg[1]) + (index << 2) + c_arg = int(arg[0]) + 2 * int(arg[1]) + (index << 2) else: assert isinstance(arg, str), f"Got {arg}, expected a str" - arg = self.add(self.names, arg) + c_arg = self.add(self.names, arg) elif opcode in _opcode.hasfree: if isinstance(arg, CellVar): cell_instrs.append(len(self.instructions)) - arg = self.bytecode.cellvars.index(arg.name) + c_arg = self.bytecode.cellvars.index(arg.name) else: assert isinstance(arg, FreeVar) free_instrs.append(len(self.instructions)) - arg = self.bytecode.freevars.index(arg.name) + c_arg = self.bytecode.freevars.index(arg.name) elif opcode in _opcode.hascompare: if isinstance(arg, Compare): # In Python 3.13 the 4 lowest bits are used for caching # and the 5th one indicate a cast to bool if PY313: - arg = ( + c_arg = ( arg._get_mask() + ((arg.value & 0b1111) << 5) + (arg.value & 16) @@ -1354,16 +1355,16 @@ def concrete_instructions(self) -> None: # In Python 3.12 the 4 lowest bits are used for caching # See compare_masks in compile.c elif PY312: - arg = arg._get_mask() + (arg.value << 4) + c_arg = arg._get_mask() + (arg.value << 4) else: - arg = arg.value + c_arg = arg.value elif opcode in INTRINSIC: if isinstance(arg, (Intrinsic1Op, Intrinsic2Op)): - arg = arg.value + c_arg = arg.value # The above should have performed all the necessary conversion assert isinstance(arg, int) - c_instr = ConcreteInstr(instr_name, arg, location=location) + c_instr = ConcreteInstr(instr_name, c_arg, location=location) if is_jump: self.jumps.append((len(self.instructions), label, c_instr)) From 040f082ae21a52df4490916a151fd1fce9997d2d Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Thu, 17 Oct 2024 23:37:23 +0200 Subject: [PATCH 47/50] concrete: fix bad argument conversion --- src/bytecode/concrete.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index eeb1c0c3..e6471080 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -1361,9 +1361,11 @@ def concrete_instructions(self) -> None: elif opcode in INTRINSIC: if isinstance(arg, (Intrinsic1Op, Intrinsic2Op)): c_arg = arg.value + else: + assert isinstance(arg, int) + c_arg = arg # The above should have performed all the necessary conversion - assert isinstance(arg, int) c_instr = ConcreteInstr(instr_name, c_arg, location=location) if is_jump: self.jumps.append((len(self.instructions), label, c_instr)) From 41315e74573233080b3fadb9cd76e639001507df Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Sun, 27 Oct 2024 17:55:32 +0100 Subject: [PATCH 48/50] instr: use a lookup table when we need to convert dual opcodes to single --- src/bytecode/concrete.py | 5 ++--- src/bytecode/instr.py | 6 ++++++ tests/test_concrete.py | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index e6471080..52108cc0 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -29,6 +29,7 @@ BITFLAG2_OPCODES, BITFLAG_OPCODES, DUAL_ARG_OPCODES, + DUAL_ARG_OPCODES_SINGLE_OPS, INTRINSIC, INTRINSIC_1OP, INTRINSIC_2OP, @@ -1293,9 +1294,7 @@ def concrete_instructions(self) -> None: arg1_index = self.add(self.varnames, arg[0]) arg2_index = self.add(self.varnames, arg[1]) if arg1_index > 16 or arg2_index > 16: - parts = instr.name.split("_") - n1 = "_".join(parts[:2]) - n2 = "_".join(parts[2:]) + n1, n2 = DUAL_ARG_OPCODES_SINGLE_OPS[opcode] c_instr = ConcreteInstr(n1, arg1_index, location=location) self.instructions.append(c_instr) instr_name = n2 diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index f575d255..0158e560 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -44,12 +44,18 @@ #: Opcodes taking 2 arguments (highest 4 bits and lowest 4 bits) DUAL_ARG_OPCODES: Tuple[int, ...] = () +DUAL_ARG_OPCODES_SINGLE_OPS: dict[int, tuple[str, str]] = {} if PY313: DUAL_ARG_OPCODES = ( _opcode.opmap["LOAD_FAST_LOAD_FAST"], _opcode.opmap["STORE_FAST_LOAD_FAST"], _opcode.opmap["STORE_FAST_STORE_FAST"], ) + DUAL_ARG_OPCODES_SINGLE_OPS = { + _opcode.opmap["LOAD_FAST_LOAD_FAST"]: ("LOAD_FAST", "LOAD_FAST"), + _opcode.opmap["STORE_FAST_LOAD_FAST"]: ("STORE_FAST", "LOAD_FAST"), + _opcode.opmap["STORE_FAST_STORE_FAST"]: ("STORE_FAST", "STORE_FAST"), + } # Used for COMPARE_OP opcode argument diff --git a/tests/test_concrete.py b/tests/test_concrete.py index 3f50615a..90067d7e 100644 --- a/tests/test_concrete.py +++ b/tests/test_concrete.py @@ -1830,6 +1830,25 @@ def f(): # FIXME test more cases for line encoding in particular with extended args + @unittest.skipIf(sys.version_info < (3, 13), "Apply only to 3.13+") + def test_handling_dual_opcodes(self): + code = Bytecode() + code.extend( + [ + Instr("LOAD_FAST_LOAD_FAST", ("a", "b"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("c", "d"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("e", "f"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("g", "h"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("i", "j"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("k", "l"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("m", "n"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("o", "p"), lineno=1), + Instr("LOAD_FAST_LOAD_FAST", ("q", "r"), lineno=1), + ] + ) + concrete = code.to_concrete_bytecode() + assert len(concrete) == 10 + if __name__ == "__main__": unittest.main() # pragma: no cover From d1c77b058205ad96d1fe4b7cecdd12e49a8eab6b Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Sun, 27 Oct 2024 18:03:51 +0100 Subject: [PATCH 49/50] ci: do not run framework tests on 3.8 --- .github/workflows/frameworks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/frameworks.yml b/.github/workflows/frameworks.yml index 84135f67..f156fa23 100644 --- a/.github/workflows/frameworks.yml +++ b/.github/workflows/frameworks.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 From b50cc68aac543309e939d9642509fb1204ac447e Mon Sep 17 00:00:00 2001 From: MatthieuDartiailh Date: Sun, 27 Oct 2024 18:07:16 +0100 Subject: [PATCH 50/50] allow to run on 3.8 --- .github/workflows/frameworks.yml | 2 +- src/bytecode/instr.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/frameworks.yml b/.github/workflows/frameworks.yml index f156fa23..84135f67 100644 --- a/.github/workflows/frameworks.yml +++ b/.github/workflows/frameworks.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index 0158e560..e6bbcde3 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -44,7 +44,7 @@ #: Opcodes taking 2 arguments (highest 4 bits and lowest 4 bits) DUAL_ARG_OPCODES: Tuple[int, ...] = () -DUAL_ARG_OPCODES_SINGLE_OPS: dict[int, tuple[str, str]] = {} +DUAL_ARG_OPCODES_SINGLE_OPS: Dict[int, Tuple[str, str]] = {} if PY313: DUAL_ARG_OPCODES = ( _opcode.opmap["LOAD_FAST_LOAD_FAST"],