diff --git a/setup.py b/setup.py index 69a08b737c..5b1ae1b81a 100644 --- a/setup.py +++ b/setup.py @@ -94,6 +94,7 @@ def _global_version(version): "asttokens>=2.0.5,<3", "pycryptodome>=3.5.1,<4", "packaging>=23.1,<24", + "lark>=1.0.0,<2", "importlib-metadata", "wheel", ], @@ -105,6 +106,7 @@ def _global_version(version): "vyper=vyper.cli.vyper_compile:_parse_cli_args", "fang=vyper.cli.vyper_ir:_parse_cli_args", "vyper-json=vyper.cli.vyper_json:_parse_cli_args", + "venom=vyper.cli.venom_main:_parse_cli_args", ] }, classifiers=[ diff --git a/tests/functional/syntax/exceptions/test_vyper_exception_pos.py b/tests/functional/syntax/exceptions/test_vyper_exception_pos.py index 9e0767cb83..17bd4de1cd 100644 --- a/tests/functional/syntax/exceptions/test_vyper_exception_pos.py +++ b/tests/functional/syntax/exceptions/test_vyper_exception_pos.py @@ -1,6 +1,7 @@ from pytest import raises -from vyper.exceptions import VyperException +from vyper import compile_code +from vyper.exceptions import SyntaxException, VyperException def test_type_exception_pos(): @@ -29,3 +30,32 @@ def __init__(): """ assert_compile_failed(lambda: get_contract(code), VyperException) + + +def test_exception_contains_file(make_input_bundle): + code = """ +def bar()>: + """ + input_bundle = make_input_bundle({"code.vy": code}) + with raises(SyntaxException, match="contract"): + compile_code(code, input_bundle=input_bundle) + + +def test_exception_reports_correct_file(make_input_bundle, chdir_tmp_path): + code_a = "def bar()>:" + code_b = "import A" + input_bundle = make_input_bundle({"A.vy": code_a, "B.vy": code_b}) + + with raises(SyntaxException, match=r'contract "A\.vy:\d+"'): + compile_code(code_b, input_bundle=input_bundle) + + +def test_syntax_exception_reports_correct_offset(make_input_bundle): + code = """ +def foo(): + uint256 a = pass + """ + input_bundle = make_input_bundle({"code.vy": code}) + + with raises(SyntaxException, match=r"line \d+:12"): + compile_code(code, input_bundle=input_bundle) diff --git a/tests/functional/venom/__init__.py b/tests/functional/venom/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/functional/venom/parser/__init__.py b/tests/functional/venom/parser/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py new file mode 100644 index 0000000000..c121edb692 --- /dev/null +++ b/tests/functional/venom/parser/test_parsing.py @@ -0,0 +1,275 @@ +from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable +from vyper.venom.context import IRContext +from vyper.venom.function import IRFunction +from vyper.venom.parser import parse_venom + +# TODO: Refactor tests with these helpers + + +def instructions_eq(i1: IRInstruction, i2: IRInstruction) -> bool: + return i1.output == i2.output and i1.opcode == i2.opcode and i1.operands == i2.operands + + +def assert_bb_eq(bb1: IRBasicBlock, bb2: IRBasicBlock): + assert bb1.label.value == bb2.label.value + assert len(bb1.instructions) == len(bb2.instructions) + for i1, i2 in zip(bb1.instructions, bb2.instructions): + assert instructions_eq(i1, i2), f"[{i1}] != [{i2}]" + + +def assert_fn_eq(fn1: IRFunction, fn2: IRFunction): + assert fn1.name.value == fn2.name.value + assert fn1.last_variable == fn2.last_variable + assert len(fn1._basic_block_dict) == len(fn2._basic_block_dict) + + for name1, bb1 in fn1._basic_block_dict.items(): + assert name1 in fn2._basic_block_dict + assert_bb_eq(bb1, fn2._basic_block_dict[name1]) + + # check function entry is the same + assert fn1.entry.label == fn2.entry.label + + +def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext): + assert ctx1.last_label == ctx2.last_label + assert len(ctx1.functions) == len(ctx2.functions) + for label1, fn1 in ctx1.functions.items(): + assert label1 in ctx2.functions + assert_fn_eq(fn1, ctx2.functions[label1]) + + # check entry function is the same + assert next(iter(ctx1.functions.keys())) == next(iter(ctx2.functions.keys())) + + assert len(ctx1.data_segment) == len(ctx2.data_segment) + for d1, d2 in zip(ctx1.data_segment, ctx2.data_segment): + assert instructions_eq(d1, d2), f"data: [{d1}] != [{d2}]" + + +def test_single_bb(): + source = """ + function main { + main: + stop + } + + [data] + """ + + parsed_ctx = parse_venom(source) + + expected_ctx = IRContext() + expected_ctx.add_function(main_fn := IRFunction(IRLabel("main"))) + main_bb = main_fn.get_basic_block("main") + main_bb.append_instruction("stop") + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_multi_bb_single_fn(): + source = """ + function start { + start: + %1 = callvalue + jnz @fine, @has_callvalue, %1 + fine: + %2 = calldataload 4 + %4 = add %2, 279387 + return %2, %4 + has_callvalue: + revert 0, 0 + } + + [data] + """ + + parsed_ctx = parse_venom(source) + + expected_ctx = IRContext() + expected_ctx.add_function(start_fn := IRFunction(IRLabel("start"))) + + start_bb = start_fn.get_basic_block("start") + start_bb.append_instruction("callvalue", ret=IRVariable("1")) + start_bb.append_instruction("jnz", IRVariable("1"), IRLabel("has_callvalue"), IRLabel("fine")) + + start_fn.append_basic_block(fine_bb := IRBasicBlock(IRLabel("fine"), start_fn)) + fine_bb.append_instruction("calldataload", IRLiteral(4), ret=IRVariable("2")) + fine_bb.append_instruction("add", IRLiteral(279387), IRVariable("2"), ret=IRVariable("4")) + fine_bb.append_instruction("return", IRVariable("4"), IRVariable("2")) + + has_callvalue_bb = IRBasicBlock(IRLabel("has_callvalue"), start_fn) + start_fn.append_basic_block(has_callvalue_bb) + has_callvalue_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + has_callvalue_bb.append_instruction("stop") + + start_fn.last_variable = 4 + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_data_section(): + parsed_ctx = parse_venom( + """ + function entry { + entry: + stop + } + + [data] + dbname @selector_buckets + db @selector_bucket_0 + db @fallback + db @selector_bucket_2 + db @selector_bucket_3 + db @fallback + db @selector_bucket_5 + db @selector_bucket_6 + """ + ) + + expected_ctx = IRContext() + expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) + entry_fn.get_basic_block("entry").append_instruction("stop") + + expected_ctx.data_segment = [ + IRInstruction("dbname", [IRLabel("selector_buckets")]), + IRInstruction("db", [IRLabel("selector_bucket_0")]), + IRInstruction("db", [IRLabel("fallback")]), + IRInstruction("db", [IRLabel("selector_bucket_2")]), + IRInstruction("db", [IRLabel("selector_bucket_3")]), + IRInstruction("db", [IRLabel("fallback")]), + IRInstruction("db", [IRLabel("selector_bucket_5")]), + IRInstruction("db", [IRLabel("selector_bucket_6")]), + ] + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_multi_function(): + parsed_ctx = parse_venom( + """ + function entry { + entry: + invoke @check_cv + jmp @wow + wow: + mstore 0, 1 + return 0, 32 + } + + function check_cv { + check_cv: + %1 = callvalue + %2 = param + jnz @no_value, @has_value, %1 + no_value: + ret %2 + has_value: + revert 0, 0 + } + + [data] + """ + ) + + expected_ctx = IRContext() + expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) + + entry_bb = entry_fn.get_basic_block("entry") + entry_bb.append_instruction("invoke", IRLabel("check_cv")) + entry_bb.append_instruction("jmp", IRLabel("wow")) + + entry_fn.append_basic_block(wow_bb := IRBasicBlock(IRLabel("wow"), entry_fn)) + wow_bb.append_instruction("mstore", IRLiteral(1), IRLiteral(0)) + wow_bb.append_instruction("return", IRLiteral(32), IRLiteral(0)) + + expected_ctx.add_function(check_fn := IRFunction(IRLabel("check_cv"))) + + check_entry_bb = check_fn.get_basic_block("check_cv") + check_entry_bb.append_instruction("callvalue", ret=IRVariable("1")) + check_entry_bb.append_instruction("param", ret=IRVariable("2")) + check_entry_bb.append_instruction( + "jnz", IRVariable("1"), IRLabel("has_value"), IRLabel("no_value") + ) + check_fn.append_basic_block(no_value_bb := IRBasicBlock(IRLabel("no_value"), check_fn)) + no_value_bb.append_instruction("ret", IRVariable("2")) + + check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) + value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + value_bb.append_instruction("stop") + + check_fn.last_variable = 2 + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_multi_function_and_data(): + parsed_ctx = parse_venom( + """ + function entry { + entry: + invoke @check_cv + jmp @wow + wow: + mstore 0, 1 + return 0, 32 + } + + function check_cv { + check_cv: + %1 = callvalue + %2 = param + jnz @no_value, @has_value, %1 + no_value: + ret %2 + has_value: + revert 0, 0 + } + + [data] + dbname @selector_buckets + db @selector_bucket_0 + db @fallback + db @selector_bucket_2 + db @selector_bucket_3 + db @selector_bucket_6 + """ + ) + + expected_ctx = IRContext() + expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) + + entry_bb = entry_fn.get_basic_block("entry") + entry_bb.append_instruction("invoke", IRLabel("check_cv")) + entry_bb.append_instruction("jmp", IRLabel("wow")) + + entry_fn.append_basic_block(wow_bb := IRBasicBlock(IRLabel("wow"), entry_fn)) + wow_bb.append_instruction("mstore", IRLiteral(1), IRLiteral(0)) + wow_bb.append_instruction("return", IRLiteral(32), IRLiteral(0)) + + expected_ctx.add_function(check_fn := IRFunction(IRLabel("check_cv"))) + + check_entry_bb = check_fn.get_basic_block("check_cv") + check_entry_bb.append_instruction("callvalue", ret=IRVariable("1")) + check_entry_bb.append_instruction("param", ret=IRVariable("2")) + check_entry_bb.append_instruction( + "jnz", IRVariable("1"), IRLabel("has_value"), IRLabel("no_value") + ) + check_fn.append_basic_block(no_value_bb := IRBasicBlock(IRLabel("no_value"), check_fn)) + no_value_bb.append_instruction("ret", IRVariable("2")) + + check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) + value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + value_bb.append_instruction("stop") + + check_fn.last_variable = 2 + + expected_ctx.data_segment = [ + IRInstruction("dbname", [IRLabel("selector_buckets")]), + IRInstruction("db", [IRLabel("selector_bucket_0")]), + IRInstruction("db", [IRLabel("fallback")]), + IRInstruction("db", [IRLabel("selector_bucket_2")]), + IRInstruction("db", [IRLabel("selector_bucket_3")]), + IRInstruction("db", [IRLabel("selector_bucket_6")]), + ] + + assert_ctx_eq(parsed_ctx, expected_ctx) diff --git a/tests/unit/ast/test_natspec.py b/tests/unit/ast/test_natspec.py index 710b7a9312..37120d2978 100644 --- a/tests/unit/ast/test_natspec.py +++ b/tests/unit/ast/test_natspec.py @@ -436,3 +436,19 @@ def test_natspec_parsed_implicitly(): # anything beyond ast is blocked with pytest.raises(NatSpecSyntaxException): compile_code(code, output_formats=["annotated_ast_dict"]) + + +def test_natspec_exception_contains_file_path(): + code = """ +@external +def foo() -> (int128,uint256): + ''' + @return int128 + @return uint256 + @return this should fail + ''' + return 1, 2 + """ + + with pytest.raises(NatSpecSyntaxException, match=r'contract "VyperContract\.vy:\d+"'): + parse_natspec(code) diff --git a/tests/unit/ast/test_pre_parser.py b/tests/unit/ast/test_pre_parser.py index 73712aadb8..510d1e0ed2 100644 --- a/tests/unit/ast/test_pre_parser.py +++ b/tests/unit/ast/test_pre_parser.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from vyper import compile_code @@ -56,6 +58,24 @@ def test_invalid_version_pragma(file_version, mock_version): validate_version_pragma(f"{file_version}", file_version, (SRC_LINE)) +def test_invalid_version_contains_file(mock_version): + mock_version(COMPILER_VERSION) + with pytest.raises(VersionException, match=r'contract "mock\.vy:\d+"'): + compile_code("# pragma version ^0.3.10", resolved_path=Path("mock.vy")) + + +def test_imported_invalid_version_contains_correct_file( + mock_version, make_input_bundle, chdir_tmp_path +): + code_a = "# pragma version ^0.3.10" + code_b = "import A" + input_bundle = make_input_bundle({"A.vy": code_a, "B.vy": code_b}) + mock_version(COMPILER_VERSION) + + with pytest.raises(VersionException, match=r'contract "A\.vy:\d+"'): + compile_code(code_b, input_bundle=input_bundle) + + prerelease_valid_versions = [ "<0.1.1-beta.9", "<0.1.1b9", diff --git a/tests/unit/cli/vyper_json/test_compile_json.py b/tests/unit/cli/vyper_json/test_compile_json.py index c2ca6dbe12..329a87efac 100644 --- a/tests/unit/cli/vyper_json/test_compile_json.py +++ b/tests/unit/cli/vyper_json/test_compile_json.py @@ -260,7 +260,7 @@ def test_wrong_language(): def test_exc_handler_raises_syntax(input_json): input_json["sources"]["badcode.vy"] = {"content": BAD_SYNTAX_CODE} - with pytest.raises(SyntaxException): + with pytest.raises(SyntaxException, match=r'contract "badcode\.vy:\d+"'): compile_json(input_json) diff --git a/tests/unit/compiler/test_source_map.py b/tests/unit/compiler/test_source_map.py index d99b546403..ae1999a26e 100644 --- a/tests/unit/compiler/test_source_map.py +++ b/tests/unit/compiler/test_source_map.py @@ -97,8 +97,44 @@ def update_foo(): self.foo += 1 """ error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] - assert "safeadd" in list(error_map.values()) - assert "fallback function" in list(error_map.values()) + assert "safeadd" in error_map.values() + assert "fallback function" in error_map.values() + + +def test_error_map_with_user_error(): + code = """ +@external +def foo(): + raise "some error" + """ + error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] + assert "user revert with reason" in error_map.values() + + +def test_error_map_with_user_error2(): + code = """ +@external +def foo(i: uint256): + a: DynArray[uint256, 10] = [1] + a[i % 10] = 2 + """ + error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] + assert "safemod" in error_map.values() + + +def test_error_map_not_overriding_errors(): + code = """ +@external +def foo(i: uint256): + raise self.bar(5%i) + +@pure +def bar(i: uint256) -> String[32]: + return "foo foo" + """ + error_map = compile_code(code, output_formats=["source_map"])["source_map"]["error_map"] + assert "user revert with reason" in error_map.values() + assert "safemod" in error_map.values() def test_compress_source_map(): diff --git a/vyper/ast/natspec.py b/vyper/ast/natspec.py index 48fc9134dd..f65a361338 100644 --- a/vyper/ast/natspec.py +++ b/vyper/ast/natspec.py @@ -19,6 +19,14 @@ class NatspecOutput: def parse_natspec(annotated_vyper_module: vy_ast.Module) -> NatspecOutput: + try: + return _parse_natspec(annotated_vyper_module) + except NatSpecSyntaxException as e: + e.resolved_path = annotated_vyper_module.resolved_path + raise e + + +def _parse_natspec(annotated_vyper_module: vy_ast.Module) -> NatspecOutput: """ Parses NatSpec documentation from a contract. diff --git a/vyper/ast/parse.py b/vyper/ast/parse.py index 5d62072b9e..d975aafac4 100644 --- a/vyper/ast/parse.py +++ b/vyper/ast/parse.py @@ -23,6 +23,22 @@ def parse_to_ast_with_settings( module_path: Optional[str] = None, resolved_path: Optional[str] = None, add_fn_node: Optional[str] = None, +) -> tuple[Settings, vy_ast.Module]: + try: + return _parse_to_ast_with_settings( + vyper_source, source_id, module_path, resolved_path, add_fn_node + ) + except SyntaxException as e: + e.resolved_path = resolved_path + raise e + + +def _parse_to_ast_with_settings( + vyper_source: str, + source_id: int = 0, + module_path: Optional[str] = None, + resolved_path: Optional[str] = None, + add_fn_node: Optional[str] = None, ) -> tuple[Settings, vy_ast.Module]: """ Parses a Vyper source string and generates basic Vyper AST nodes. @@ -60,7 +76,12 @@ def parse_to_ast_with_settings( py_ast = python_ast.parse(pre_parser.reformatted_code) except SyntaxError as e: # TODO: Ensure 1-to-1 match of source_code:reformatted_code SyntaxErrors - raise SyntaxException(str(e), vyper_source, e.lineno, e.offset) from None + offset = e.offset + if offset is not None: + # SyntaxError offset is 1-based, not 0-based (see: + # https://docs.python.org/3/library/exceptions.html#SyntaxError.offset) + offset -= 1 + raise SyntaxException(str(e.msg), vyper_source, e.lineno, offset) from None # Add dummy function node to ensure local variables are treated as `AnnAssign` # instead of state variables (`VariableDecl`) diff --git a/vyper/cli/venom_main.py b/vyper/cli/venom_main.py new file mode 100755 index 0000000000..3114246e04 --- /dev/null +++ b/vyper/cli/venom_main.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import argparse +import sys + +import vyper +import vyper.evm.opcodes as evm +from vyper.compiler.phases import generate_bytecode +from vyper.compiler.settings import OptimizationLevel, Settings, set_global_settings +from vyper.venom import generate_assembly_experimental, run_passes_on +from vyper.venom.parser import parse_venom + +""" +Standalone entry point into venom compiler. Parses venom input and emits +bytecode. +""" + + +def _parse_cli_args(): + return _parse_args(sys.argv[1:]) + + +def _parse_args(argv: list[str]): + parser = argparse.ArgumentParser( + description="Venom EVM IR parser & compiler", formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument("input_file", help="Venom sourcefile", nargs="?") + parser.add_argument("--version", action="version", version=vyper.__long_version__) + parser.add_argument( + "--evm-version", + help=f"Select desired EVM version (default {evm.DEFAULT_EVM_VERSION})", + choices=list(evm.EVM_VERSIONS), + dest="evm_version", + ) + parser.add_argument( + "--stdin", action="store_true", help="whether to pull venom input from stdin" + ) + + args = parser.parse_args(argv) + + if args.evm_version is not None: + set_global_settings(Settings(evm_version=args.evm_version)) + + if args.stdin: + if not sys.stdin.isatty(): + venom_source = sys.stdin.read() + else: + # No input provided + print("Error: --stdin flag used but no input provided") + sys.exit(1) + else: + if args.input_file is None: + print("Error: No input file provided, either use --stdin or provide a path") + sys.exit(1) + with open(args.input_file, "r") as f: + venom_source = f.read() + + ctx = parse_venom(venom_source) + run_passes_on(ctx, OptimizationLevel.default()) + asm = generate_assembly_experimental(ctx) + bytecode = generate_bytecode(asm, compiler_metadata=None) + print(f"0x{bytecode.hex()}") + + +if __name__ == "__main__": + _parse_args(sys.argv[1:]) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index ff721fafcb..81ec47f10f 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -378,13 +378,18 @@ def is_complex_ir(self): and self.value.lower() not in do_not_cache ) - # set an error message and push down into all children. - # useful for overriding an error message generated by a helper - # function with a more specific error message. + # set an error message and push down to its children that don't have error_msg set def set_error_msg(self, error_msg: str) -> None: + if self.error_msg is not None: + raise CompilerPanic(f"{self.value} already has error message {self.error_msg}") + self._set_error_msg(error_msg) + + def _set_error_msg(self, error_msg: str) -> None: + if self.error_msg is not None: + return self.error_msg = error_msg for arg in self.args: - arg.set_error_msg(error_msg) + arg._set_error_msg(error_msg) # get the unique symbols contained in this node, which provides # sanity check invariants for the optimizer. @@ -627,7 +632,7 @@ def from_list( else: return cls( obj[0], - [cls.from_list(o, ast_source=ast_source) for o in obj[1:]], + [cls.from_list(o, ast_source=ast_source, error_msg=error_msg) for o in obj[1:]], typ, location=location, annotation=annotation, diff --git a/vyper/exceptions.py b/vyper/exceptions.py index c69163b561..990dbf7953 100644 --- a/vyper/exceptions.py +++ b/vyper/exceptions.py @@ -54,6 +54,7 @@ def __init__(self, message="Error Message not found.", *items, hint=None, prev_d self.lineno = None self.col_offset = None self.annotations = None + self.resolved_path = None if len(items) == 1 and isinstance(items[0], tuple) and isinstance(items[0][0], int): # support older exceptions that don't annotate - remove this in the future! @@ -127,13 +128,18 @@ def format_annotation(self, value): module_node = node.module_node # TODO: handle cases where module is None or vy_ast.Module - if module_node.get("path") not in (None, ""): - node_msg = f'{node_msg}contract "{module_node.path}:{node.lineno}", ' + if module_node.get("resolved_path") not in (None, ""): + node_msg = self._format_contract_details( + node_msg, module_node.resolved_path, node.lineno + ) fn_node = node.get_ancestor(vy_ast.FunctionDef) if fn_node: node_msg = f'{node_msg}function "{fn_node.name}", ' + elif self.resolved_path is not None: + node_msg = self._format_contract_details(node_msg, self.resolved_path, node.lineno) + col_offset_str = "" if node.col_offset is None else str(node.col_offset) node_msg = f"{node_msg}line {node.lineno}:{col_offset_str} \n{source_annotation}\n" @@ -151,6 +157,11 @@ def _add_hint(self, msg): return msg return msg + f"\n (hint: {self.hint})" + def _format_contract_details(self, msg, path, lineno): + from vyper.utils import safe_relpath + + return f'{msg}contract "{safe_relpath(path)}:{lineno}", ' + def __str__(self): return self._add_hint(self._str_helper()) diff --git a/vyper/venom/README.md b/vyper/venom/README.md index ea6eabebaa..964f52b524 100644 --- a/vyper/venom/README.md +++ b/vyper/venom/README.md @@ -29,59 +29,43 @@ Venom employs two scopes: global and function level. ### Example code ```llvm -IRFunction: global - -global: - %1 = calldataload 0 - %2 = shr 224, %1 - jmp label %selector_bucket_0 - -selector_bucket_0: - %3 = xor %2, 1579456981 - %4 = iszero %3 - jnz label %1, label %2, %4 - -1: IN=[selector_bucket_0] OUT=[9] - jmp label %fallback - -2: - %5 = callvalue - %6 = calldatasize - %7 = lt %6, 164 - %8 = or %5, %7 - %9 = iszero %8 - assert %9 - stop - -fallback: - revert 0, 0 +function global { + global: + %1 = calldataload 0 + %2 = shr 224, %1 + jmp @selector_bucket_0 + + selector_bucket_0: + %3 = xor %2, 1579456981 + %4 = iszero %3 + jnz @1, @2, %4 + + 1: + jmp @fallback + + 2: + %5 = callvalue + %6 = calldatasize + %7 = lt %6, 164 + %8 = or %5, %7 + %9 = iszero %8 + assert %9 + stop + + fallback: + revert 0, 0 +} + +[data] ``` ### Grammar -Below is a (not-so-complete) grammar to describe the text format of Venom IR: +To see a definition of grammar see the [venom parser](./parser.py) -```llvm -program ::= function_declaration* - -function_declaration ::= "IRFunction:" identifier input_list? output_list? "=>" block - -input_list ::= "IN=" "[" (identifier ("," identifier)*)? "]" -output_list ::= "OUT=" "[" (identifier ("," identifier)*)? "]" - -block ::= label ":" input_list? output_list? "=>{" operation* "}" +### Compiling Venom -operation ::= "%" identifier "=" opcode operand ("," operand)* - | opcode operand ("," operand)* - -opcode ::= "calldataload" | "shr" | "shl" | "and" | "add" | "codecopy" | "mload" | "jmp" | "xor" | "iszero" | "jnz" | "label" | "lt" | "or" | "assert" | "callvalue" | "calldatasize" | "alloca" | "calldatacopy" | "invoke" | "gt" | ... - -operand ::= "%" identifier | label | integer | "label" "%" identifier -label ::= "%" identifier - -identifier ::= [a-zA-Z_][a-zA-Z0-9_]* -integer ::= [0-9]+ -``` +Vyper ships with a venom compiler which compiles venom code to bytecode directly. It can be run by running `venom`, which is installed as a standalone binary when `vyper` is installed via `pip`. ## Implementation diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 593a9556a9..7d9404b9ef 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -72,10 +72,14 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: DFTPass(ac, fn).run_pass() +def run_passes_on(ctx: IRContext, optimize: OptimizationLevel): + for fn in ctx.functions.values(): + _run_passes(fn, optimize) + + def generate_ir(ir: IRnode, optimize: OptimizationLevel) -> IRContext: # Convert "old" IR to "new" IR ctx = ir_node_to_venom(ir) - for fn in ctx.functions.values(): - _run_passes(fn, optimize) + run_passes_on(ctx, optimize) return ctx diff --git a/vyper/venom/analysis/cfg.py b/vyper/venom/analysis/cfg.py index 700fd73f26..2f90410cd5 100644 --- a/vyper/venom/analysis/cfg.py +++ b/vyper/venom/analysis/cfg.py @@ -23,7 +23,7 @@ def analyze(self) -> None: bb.is_reachable = False for bb in fn.get_basic_blocks(): - assert bb.is_terminated + assert bb.is_terminated, f"not terminating:\n{bb}" term = bb.instructions[-1] if term.opcode in CFG_ALTERING_INSTRUCTIONS: diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 968ce42bdf..e159a6d464 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -513,7 +513,7 @@ def insert_instruction(self, instruction: IRInstruction, index: Optional[int] = assert isinstance(instruction, IRInstruction), "instruction must be an IRInstruction" if index is None: - assert not self.is_terminated, self + assert not self.is_terminated, (self, instruction) index = len(self.instructions) instruction.parent = self instruction.ast_source = self.parent.ast_source diff --git a/vyper/venom/function.py b/vyper/venom/function.py index 0c48c9740e..2372f8ba52 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -12,7 +12,6 @@ class IRFunction: name: IRLabel # symbol name ctx: "IRContext" # type: ignore # noqa: F821 args: list - last_label: int last_variable: int _basic_block_dict: dict[str, IRBasicBlock] @@ -182,7 +181,6 @@ def chain_basic_blocks(self) -> None: def copy(self): new = IRFunction(self.name) new._basic_block_dict = self._basic_block_dict.copy() - new.last_label = self.last_label new.last_variable = self.last_variable return new diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py new file mode 100644 index 0000000000..d2574c3b0c --- /dev/null +++ b/vyper/venom/parser.py @@ -0,0 +1,178 @@ +from lark import Lark, Transformer + +from vyper.venom.basicblock import ( + IRBasicBlock, + IRInstruction, + IRLabel, + IRLiteral, + IROperand, + IRVariable, +) +from vyper.venom.context import IRContext +from vyper.venom.function import IRFunction + +VENOM_PARSER = Lark( + """ + %import common.CNAME + %import common.DIGIT + %import common.LETTER + %import common.WS + %import common.INT + + # TODO: make data_section optional -- `function* data_section?` + start: function* data_section + + # TODO: consider making entry block implicit, e.g. + # `"{" instruction+ block* "}"` + function: "function" NAME "{" block* "}" + + data_section: "[data]" instruction* + + block: NAME ":" statement* + + statement: instruction | assignment + assignment: VAR_IDENT "=" expr + expr: instruction | CONST + instruction: OPCODE operands_list? + + operands_list: operand ("," operand)* + + operand: VAR_IDENT | CONST | LABEL + + CONST: INT + OPCODE: CNAME + VAR_IDENT: "%" INT (":" INT)? + LABEL: "@" NAME + NAME: (DIGIT|LETTER|"_")+ + + %ignore WS + """ +) + + +def _set_last_var(fn: IRFunction): + for bb in fn.get_basic_blocks(): + for inst in bb.instructions: + if inst.output is None: + continue + value = inst.output.value + assert value.startswith("%") + fn.last_variable = max(fn.last_variable, int(value[1:])) + + +def _set_last_label(ctx: IRContext): + for fn in ctx.functions.values(): + for bb in fn.get_basic_blocks(): + label = bb.label.value + label_head, *_ = label.split("_", maxsplit=1) + if label_head.isdigit(): + ctx.last_label = max(int(label_head), ctx.last_label) + + +def _ensure_terminated(bb): + # Since "revert" is not considered terminal explicitly check for it to ensure basic + # blocks are terminating + if not bb.is_terminated and any(inst.opcode == "revert" for inst in bb.instructions): + bb.append_instruction("stop") + + +class VenomTransformer(Transformer): + def start(self, children) -> IRContext: + ctx = IRContext() + funcs = children[:-1] + data_section = children[-1] + for fn_name, blocks in funcs: + fn = ctx.create_function(fn_name) + fn._basic_block_dict.clear() + + for block_name, instructions in blocks: + bb = IRBasicBlock(IRLabel(block_name), fn) + fn.append_basic_block(bb) + + for instruction in instructions: + assert isinstance(instruction, IRInstruction) # help mypy + bb.insert_instruction(instruction) + + _ensure_terminated(bb) + + _set_last_var(fn) + _set_last_label(ctx) + + ctx.data_segment = data_section + + return ctx + + def function(self, children) -> tuple[str, list[tuple[str, list[IRInstruction]]]]: + name, *blocks = children + return name, blocks + + def statement(self, children): + return children[0] + + def data_section(self, children): + return children + + def block(self, children) -> tuple[str, list[IRInstruction]]: + label, *instructions = children + return label, instructions + + def assignment(self, children) -> IRInstruction: + to, value = children + if isinstance(value, IRInstruction): + value.output = to + return value + if isinstance(value, IRLiteral): + return IRInstruction("store", [value], output=to) + raise TypeError(f"Unexpected value {value} of type {type(value)}") + + def expr(self, children): + return children[0] + + def instruction(self, children) -> IRInstruction: + if len(children) == 1: + name = children[0] + operands = [] + else: + assert len(children) == 2 + name, operands = children + + # reverse operands, venom internally represents top of stack + # as rightmost operand + return IRInstruction(name, reversed(operands)) + + def operands_list(self, children) -> list[IROperand]: + return children + + def operand(self, children) -> IROperand: + return children[0] + + def OPCODE(self, token): + return token.value + + def LABEL(self, label) -> IRLabel: + return IRLabel(label[1:]) + + def VAR_IDENT(self, var_ident) -> IRVariable: + parts = var_ident[1:].split(":", maxsplit=1) + assert 1 <= len(parts) <= 2 + varname = parts[0] + version = None + if len(parts) > 1: + version = parts[1] + return IRVariable(varname, version=version) + + def CONST(self, val) -> IRLiteral: + return IRLiteral(int(val)) + + def CNAME(self, val) -> str: + return val.value + + def NAME(self, val) -> str: + return val.value + + +def parse_venom(source: str) -> IRContext: + tree = VENOM_PARSER.parse(source) + ctx = VenomTransformer().transform(tree) + assert isinstance(ctx, IRContext) # help mypy + return ctx diff --git a/vyper/venom/passes/sccp/sccp.py b/vyper/venom/passes/sccp/sccp.py index 369be3e753..9004a357f0 100644 --- a/vyper/venom/passes/sccp/sccp.py +++ b/vyper/venom/passes/sccp/sccp.py @@ -143,7 +143,7 @@ def _handle_SSA_work_item(self, work_item: SSAWorkListItem): self._visit_expr(work_item.inst) def _lookup_from_lattice(self, op: IROperand) -> LatticeItem: - assert isinstance(op, IRVariable), "Can't get lattice for non-variable" + assert isinstance(op, IRVariable), f"Can't get lattice for non-variable ({op})" lat = self.lattice[op] assert lat is not None, f"Got undefined var {op}" return lat