From 0aa21d6433e4e8361af587ddd6598d277a565ec6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 4 Sep 2023 15:46:00 -0400 Subject: [PATCH 1/4] feat: add runtime code layout to initcode this commit adds the runtime code layout to the initcode payload (as a suffix), so that the runtime code can be analyzed without source code. this is particularly important for disassemblers, which need demarcations for where the data section starts as distinct from the runtime code segment itself. the layout is: CBOR-encoded list: runtime code length [ for data section in runtime data sections] immutable section length {"vyper": (major, minor, patch)} length of CBOR-encoded list + 2, encoded as two big-endian bytes. note the specific format for the CBOR payload was chosen to avoid changing the last 13 bytes of the signature. that is, the last 13 bytes still look like b"\xa1evyper\x83...", this is because, as the last item in a list, its encoding does not change compared to being the only dict in the payload. this commit also changes the meaning of the two footer bytes: they now indicate the length of the entire footer (including the two bytes indicating the footer length). the sole purpose of this is to be more intuitive as the two footer bytes indicate offset-from-the-end where the CBOR-encoded metadata starts, rather than the length of the CBOR payload (without the two length bytes). lastly, this commit renames the internal `insert_vyper_signature=` kwarg to `insert_compiler_metadata=` as the metadata includes more than just the vyper version now. --- setup.py | 1 + vyper/compiler/output.py | 2 +- vyper/compiler/phases.py | 12 ++++---- vyper/ir/compile_ir.py | 64 +++++++++++++++++++++++++++------------- 4 files changed, 53 insertions(+), 26 deletions(-) diff --git a/setup.py b/setup.py index bbf6e60f55..c251071229 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ def _global_version(version): python_requires=">=3.10,<4", py_modules=["vyper"], install_requires=[ + "cbor2>=5.4.6,<6", "asttokens>=2.0.5,<3", "pycryptodome>=3.5.1,<4", "semantic-version>=2.10,<3", diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 69fcbf1f1f..1f1d56af78 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -218,7 +218,7 @@ def _build_asm(asm_list): def build_source_map_output(compiler_data: CompilerData) -> OrderedDict: _, line_number_map = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, insert_vyper_signature=False + compiler_data.assembly_runtime, insert_compiler_signature=False ) # Sort line_number_map out = OrderedDict() diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 526d2f3253..a1c7342320 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -184,12 +184,12 @@ def assembly_runtime(self) -> list: @cached_property def bytecode(self) -> bytes: - insert_vyper_signature = not self.no_bytecode_metadata - return generate_bytecode(self.assembly, insert_vyper_signature=insert_vyper_signature) + insert_compiler_metadata = not self.no_bytecode_metadata + return generate_bytecode(self.assembly, insert_compiler_metadata=insert_compiler_metadata) @cached_property def bytecode_runtime(self) -> bytes: - return generate_bytecode(self.assembly_runtime, insert_vyper_signature=False) + return generate_bytecode(self.assembly_runtime, insert_compiler_metadata=False) @cached_property def blueprint_bytecode(self) -> bytes: @@ -331,7 +331,7 @@ def _find_nested_opcode(assembly, key): return any(_find_nested_opcode(x, key) for x in sublists) -def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes: +def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes: """ Generate bytecode from assembly instructions. @@ -345,4 +345,6 @@ def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes: bytes Final compiled bytecode. """ - return compile_ir.assembly_to_evm(assembly, insert_vyper_signature=insert_vyper_signature)[0] + return compile_ir.assembly_to_evm(assembly, insert_compiler_metadata=insert_compiler_metadata)[ + 0 + ] diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index bba3b34515..7a3e97155b 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1,6 +1,9 @@ import copy import functools import math +from dataclasses import dataclass + +import cbor2 from vyper.codegen.ir_node import IRnode from vyper.compiler.settings import OptimizationLevel @@ -507,9 +510,9 @@ def _height_of(witharg): elif code.value == "deploy": memsize = code.args[0].value # used later to calculate _mem_deploy_start ir = code.args[1] - padding = code.args[2].value + immutables_len = code.args[2].value assert isinstance(memsize, int), "non-int memsize" - assert isinstance(padding, int), "non-int padding" + assert isinstance(immutables_len, int), "non-int immutables_len" runtime_begin = mksymbol("runtime_begin") @@ -521,14 +524,14 @@ def _height_of(witharg): o.extend(["_sym_subcode_size", runtime_begin, "_mem_deploy_start", "CODECOPY"]) # calculate the len of runtime code - o.extend(["_OFST", "_sym_subcode_size", padding]) # stack: len + o.extend(["_OFST", "_sym_subcode_size", immutables_len]) # stack: len o.extend(["_mem_deploy_start"]) # stack: len mem_ofst o.extend(["RETURN"]) # since the asm data structures are very primitive, to make sure # assembly_to_evm is able to calculate data offsets correctly, # we pass the memsize via magic opcodes to the subcode - subcode = [_RuntimeHeader(runtime_begin, memsize)] + subcode + subcode = [_RuntimeHeader(runtime_begin, memsize, immutables_len)] + subcode # append the runtime code after the ctor code # `append(...)` call here is intentional. @@ -1051,18 +1054,19 @@ def _length_of_data(assembly): return ret +@dataclass class _RuntimeHeader: - def __init__(self, label, ctor_mem_size): - self.label = label - self.ctor_mem_size = ctor_mem_size + label: str + ctor_mem_size: int + immutables_len: int def __repr__(self): - return f"" + return f"" +@dataclass class _DataHeader: - def __init__(self, label): - self.label = label + label: str def __repr__(self): return f"DATA {self.label}" @@ -1092,21 +1096,21 @@ def _relocate_segments(assembly): # TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps -def assembly_to_evm(assembly, pc_ofst=0, insert_vyper_signature=False): +def assembly_to_evm(assembly, pc_ofst=0, insert_compiler_metadata=False): bytecode, source_maps, _ = assembly_to_evm_with_symbol_map( - assembly, pc_ofst=pc_ofst, insert_vyper_signature=insert_vyper_signature + assembly, pc_ofst=pc_ofst, insert_compiler_metadata=insert_compiler_metadata ) return bytecode, source_maps -def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=False): +def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadata=False): """ Assembles assembly into EVM assembly: list of asm instructions pc_ofst: when constructing the source map, the amount to offset all pcs by (no effect until we add deploy code source map) - insert_vyper_signature: whether to append vyper metadata to output + insert_compiler_metadata: whether to append vyper metadata to output (should be true for runtime code) """ line_number_map = { @@ -1122,12 +1126,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature= runtime_code, runtime_code_start, runtime_code_end = None, None, None - bytecode_suffix = b"" - if insert_vyper_signature: - # CBOR encoded: {"vyper": [major,minor,patch]} - bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple)) - bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big") - # to optimize the size of deploy code - we want to use the smallest # PUSH instruction possible which can support all memory symbols # (and also works with linear pass symbol resolution) @@ -1155,6 +1153,9 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature= if runtime_code_end is not None: mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst) + data_section_lengths = [] + immutables_len = None + # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): @@ -1198,18 +1199,41 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature= # [_OFST, _mem_foo, bar] -> PUSHN (foo+bar) pc -= 1 elif isinstance(item, list) and isinstance(item[0], _RuntimeHeader): + # we are in initcode symbol_map[item[0].label] = pc # add source map for all items in the runtime map t = adjust_pc_maps(runtime_map, pc) for key in line_number_map: line_number_map[key].update(t[key]) + immutables_len = item[0].immutables_len pc += len(runtime_code) + # grab lengths of data sections from the runtime + for t in item: + if isinstance(t, list) and isinstance(t[0], _DataHeader): + data_section_lengths.append(_length_of_data(t)) + elif isinstance(item, list) and isinstance(item[0], _DataHeader): symbol_map[item[0].label] = pc pc += _length_of_data(item) else: pc += 1 + bytecode_suffix = b"" + if insert_compiler_metadata: + # this will hold true when we are in initcode + assert immutables_len is not None + metadata = ( + len(runtime_code), + data_section_lengths, + immutables_len, + {"vyper": version_tuple}, + ) + bytecode_suffix += cbor2.dumps(metadata) + # append the length of the footer, *including* the length + # of the length bytes themselves. + suffix_len = len(bytecode_suffix) + 2 + bytecode_suffix += suffix_len.to_bytes(2, "big") + pc += len(bytecode_suffix) symbol_map["_sym_code_end"] = pc From b6beaa04d4d09cfa8529040753a979386203b8a4 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 4 Sep 2023 15:59:40 -0400 Subject: [PATCH 2/4] fix lint --- vyper/compiler/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 1f1d56af78..334c5ba613 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -218,7 +218,7 @@ def _build_asm(asm_list): def build_source_map_output(compiler_data: CompilerData) -> OrderedDict: _, line_number_map = compile_ir.assembly_to_evm( - compiler_data.assembly_runtime, insert_compiler_signature=False + compiler_data.assembly_runtime, insert_compiler_metadata=False ) # Sort line_number_map out = OrderedDict() From d32a33c546cc1979753433e57f1d624aa7748b9b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 4 Sep 2023 21:26:40 -0400 Subject: [PATCH 3/4] add tests for vyper signature --- tests/compiler/test_bytecode_runtime.py | 133 ++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 6 deletions(-) diff --git a/tests/compiler/test_bytecode_runtime.py b/tests/compiler/test_bytecode_runtime.py index 86eff70a50..3acf3ab529 100644 --- a/tests/compiler/test_bytecode_runtime.py +++ b/tests/compiler/test_bytecode_runtime.py @@ -1,14 +1,135 @@ -import vyper +import cbor2 +import pytest +import vyper +from vyper.compiler.settings import OptimizationLevel, Settings -def test_bytecode_runtime(): - code = """ +simple_contract_code = """ @external def a() -> bool: return True - """ +""" + +many_functions = """ +@external +def foo1(): + pass + +@external +def foo2(): + pass + +@external +def foo3(): + pass + +@external +def foo4(): + pass + +@external +def foo5(): + pass +""" + +has_immutables = """ +A_GOOD_PRIME: public(immutable(uint256)) + +@external +def __init__(): + A_GOOD_PRIME = 967 +""" + + +def _parse_cbor_metadata(initcode): + metadata_ofst = int.from_bytes(initcode[-2:]) + metadata = cbor2.loads(initcode[-metadata_ofst:-2]) + return metadata - out = vyper.compile_code(code, ["bytecode_runtime", "bytecode"]) + +def test_bytecode_runtime(): + out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"]) assert len(out["bytecode"]) > len(out["bytecode_runtime"]) - assert out["bytecode_runtime"][2:] in out["bytecode"][2:] + assert out["bytecode_runtime"].removeprefix("0x") in out["bytecode"].removeprefix("0x") + + +def test_bytecode_signature(): + out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"]) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [] + assert immutables_len == 0 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +def test_bytecode_signature_dense_jumptable(): + settings = Settings(optimize=OptimizationLevel.CODESIZE) + + out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [5, 35] + assert immutables_len == 0 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +def test_bytecode_signature_sparse_jumptable(): + settings = Settings(optimize=OptimizationLevel.GAS) + + out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [8] + assert immutables_len == 0 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +def test_bytecode_signature_immutables(): + out = vyper.compile_code(has_immutables, ["bytecode_runtime", "bytecode"]) + + runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x")) + initcode = bytes.fromhex(out["bytecode"].removeprefix("0x")) + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert runtime_len == len(runtime_code) + assert data_section_lengths == [] + assert immutables_len == 32 + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + +# check that deployed bytecode actually matches the cbor metadata +@pytest.mark.parametrize("code", [simple_contract_code, has_immutables, many_functions]) +def test_bytecode_signature_deployed(code, get_contract, w3): + c = get_contract(code) + deployed_code = w3.eth.get_code(c.address) + + initcode = c._classic_contract.bytecode + + metadata = _parse_cbor_metadata(initcode) + runtime_len, data_section_lengths, immutables_len, compiler = metadata + + assert compiler == {"vyper": list(vyper.version.version_tuple)} + + # runtime_len includes data sections but not immutables + assert len(deployed_code) == runtime_len + immutables_len From 1f7986422e07b4c76cf41d5de1c01989f62cf930 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 5 Sep 2023 08:22:20 -0400 Subject: [PATCH 4/4] fix int.from_bytes for python 3.10 apparently, byteorder is required in 3.10 but not in 3.11 --- tests/compiler/test_bytecode_runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/compiler/test_bytecode_runtime.py b/tests/compiler/test_bytecode_runtime.py index 3acf3ab529..9519b03772 100644 --- a/tests/compiler/test_bytecode_runtime.py +++ b/tests/compiler/test_bytecode_runtime.py @@ -42,7 +42,7 @@ def __init__(): def _parse_cbor_metadata(initcode): - metadata_ofst = int.from_bytes(initcode[-2:]) + metadata_ofst = int.from_bytes(initcode[-2:], "big") metadata = cbor2.loads(initcode[-metadata_ofst:-2]) return metadata