diff --git a/setup.py b/setup.py index 69a08b737c..5b1ae1b81a 100644 --- a/setup.py +++ b/setup.py @@ -94,6 +94,7 @@ def _global_version(version): "asttokens>=2.0.5,<3", "pycryptodome>=3.5.1,<4", "packaging>=23.1,<24", + "lark>=1.0.0,<2", "importlib-metadata", "wheel", ], @@ -105,6 +106,7 @@ def _global_version(version): "vyper=vyper.cli.vyper_compile:_parse_cli_args", "fang=vyper.cli.vyper_ir:_parse_cli_args", "vyper-json=vyper.cli.vyper_json:_parse_cli_args", + "venom=vyper.cli.venom_main:_parse_cli_args", ] }, classifiers=[ diff --git a/tests/functional/venom/__init__.py b/tests/functional/venom/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/functional/venom/parser/__init__.py b/tests/functional/venom/parser/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/functional/venom/parser/test_parsing.py b/tests/functional/venom/parser/test_parsing.py new file mode 100644 index 0000000000..c121edb692 --- /dev/null +++ b/tests/functional/venom/parser/test_parsing.py @@ -0,0 +1,275 @@ +from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable +from vyper.venom.context import IRContext +from vyper.venom.function import IRFunction +from vyper.venom.parser import parse_venom + +# TODO: Refactor tests with these helpers + + +def instructions_eq(i1: IRInstruction, i2: IRInstruction) -> bool: + return i1.output == i2.output and i1.opcode == i2.opcode and i1.operands == i2.operands + + +def assert_bb_eq(bb1: IRBasicBlock, bb2: IRBasicBlock): + assert bb1.label.value == bb2.label.value + assert len(bb1.instructions) == len(bb2.instructions) + for i1, i2 in zip(bb1.instructions, bb2.instructions): + assert instructions_eq(i1, i2), f"[{i1}] != [{i2}]" + + +def assert_fn_eq(fn1: IRFunction, fn2: IRFunction): + assert fn1.name.value == fn2.name.value + assert fn1.last_variable == fn2.last_variable + assert len(fn1._basic_block_dict) == len(fn2._basic_block_dict) + + for name1, bb1 in fn1._basic_block_dict.items(): + assert name1 in fn2._basic_block_dict + assert_bb_eq(bb1, fn2._basic_block_dict[name1]) + + # check function entry is the same + assert fn1.entry.label == fn2.entry.label + + +def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext): + assert ctx1.last_label == ctx2.last_label + assert len(ctx1.functions) == len(ctx2.functions) + for label1, fn1 in ctx1.functions.items(): + assert label1 in ctx2.functions + assert_fn_eq(fn1, ctx2.functions[label1]) + + # check entry function is the same + assert next(iter(ctx1.functions.keys())) == next(iter(ctx2.functions.keys())) + + assert len(ctx1.data_segment) == len(ctx2.data_segment) + for d1, d2 in zip(ctx1.data_segment, ctx2.data_segment): + assert instructions_eq(d1, d2), f"data: [{d1}] != [{d2}]" + + +def test_single_bb(): + source = """ + function main { + main: + stop + } + + [data] + """ + + parsed_ctx = parse_venom(source) + + expected_ctx = IRContext() + expected_ctx.add_function(main_fn := IRFunction(IRLabel("main"))) + main_bb = main_fn.get_basic_block("main") + main_bb.append_instruction("stop") + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_multi_bb_single_fn(): + source = """ + function start { + start: + %1 = callvalue + jnz @fine, @has_callvalue, %1 + fine: + %2 = calldataload 4 + %4 = add %2, 279387 + return %2, %4 + has_callvalue: + revert 0, 0 + } + + [data] + """ + + parsed_ctx = parse_venom(source) + + expected_ctx = IRContext() + expected_ctx.add_function(start_fn := IRFunction(IRLabel("start"))) + + start_bb = start_fn.get_basic_block("start") + start_bb.append_instruction("callvalue", ret=IRVariable("1")) + start_bb.append_instruction("jnz", IRVariable("1"), IRLabel("has_callvalue"), IRLabel("fine")) + + start_fn.append_basic_block(fine_bb := IRBasicBlock(IRLabel("fine"), start_fn)) + fine_bb.append_instruction("calldataload", IRLiteral(4), ret=IRVariable("2")) + fine_bb.append_instruction("add", IRLiteral(279387), IRVariable("2"), ret=IRVariable("4")) + fine_bb.append_instruction("return", IRVariable("4"), IRVariable("2")) + + has_callvalue_bb = IRBasicBlock(IRLabel("has_callvalue"), start_fn) + start_fn.append_basic_block(has_callvalue_bb) + has_callvalue_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + has_callvalue_bb.append_instruction("stop") + + start_fn.last_variable = 4 + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_data_section(): + parsed_ctx = parse_venom( + """ + function entry { + entry: + stop + } + + [data] + dbname @selector_buckets + db @selector_bucket_0 + db @fallback + db @selector_bucket_2 + db @selector_bucket_3 + db @fallback + db @selector_bucket_5 + db @selector_bucket_6 + """ + ) + + expected_ctx = IRContext() + expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) + entry_fn.get_basic_block("entry").append_instruction("stop") + + expected_ctx.data_segment = [ + IRInstruction("dbname", [IRLabel("selector_buckets")]), + IRInstruction("db", [IRLabel("selector_bucket_0")]), + IRInstruction("db", [IRLabel("fallback")]), + IRInstruction("db", [IRLabel("selector_bucket_2")]), + IRInstruction("db", [IRLabel("selector_bucket_3")]), + IRInstruction("db", [IRLabel("fallback")]), + IRInstruction("db", [IRLabel("selector_bucket_5")]), + IRInstruction("db", [IRLabel("selector_bucket_6")]), + ] + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_multi_function(): + parsed_ctx = parse_venom( + """ + function entry { + entry: + invoke @check_cv + jmp @wow + wow: + mstore 0, 1 + return 0, 32 + } + + function check_cv { + check_cv: + %1 = callvalue + %2 = param + jnz @no_value, @has_value, %1 + no_value: + ret %2 + has_value: + revert 0, 0 + } + + [data] + """ + ) + + expected_ctx = IRContext() + expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) + + entry_bb = entry_fn.get_basic_block("entry") + entry_bb.append_instruction("invoke", IRLabel("check_cv")) + entry_bb.append_instruction("jmp", IRLabel("wow")) + + entry_fn.append_basic_block(wow_bb := IRBasicBlock(IRLabel("wow"), entry_fn)) + wow_bb.append_instruction("mstore", IRLiteral(1), IRLiteral(0)) + wow_bb.append_instruction("return", IRLiteral(32), IRLiteral(0)) + + expected_ctx.add_function(check_fn := IRFunction(IRLabel("check_cv"))) + + check_entry_bb = check_fn.get_basic_block("check_cv") + check_entry_bb.append_instruction("callvalue", ret=IRVariable("1")) + check_entry_bb.append_instruction("param", ret=IRVariable("2")) + check_entry_bb.append_instruction( + "jnz", IRVariable("1"), IRLabel("has_value"), IRLabel("no_value") + ) + check_fn.append_basic_block(no_value_bb := IRBasicBlock(IRLabel("no_value"), check_fn)) + no_value_bb.append_instruction("ret", IRVariable("2")) + + check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) + value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + value_bb.append_instruction("stop") + + check_fn.last_variable = 2 + + assert_ctx_eq(parsed_ctx, expected_ctx) + + +def test_multi_function_and_data(): + parsed_ctx = parse_venom( + """ + function entry { + entry: + invoke @check_cv + jmp @wow + wow: + mstore 0, 1 + return 0, 32 + } + + function check_cv { + check_cv: + %1 = callvalue + %2 = param + jnz @no_value, @has_value, %1 + no_value: + ret %2 + has_value: + revert 0, 0 + } + + [data] + dbname @selector_buckets + db @selector_bucket_0 + db @fallback + db @selector_bucket_2 + db @selector_bucket_3 + db @selector_bucket_6 + """ + ) + + expected_ctx = IRContext() + expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) + + entry_bb = entry_fn.get_basic_block("entry") + entry_bb.append_instruction("invoke", IRLabel("check_cv")) + entry_bb.append_instruction("jmp", IRLabel("wow")) + + entry_fn.append_basic_block(wow_bb := IRBasicBlock(IRLabel("wow"), entry_fn)) + wow_bb.append_instruction("mstore", IRLiteral(1), IRLiteral(0)) + wow_bb.append_instruction("return", IRLiteral(32), IRLiteral(0)) + + expected_ctx.add_function(check_fn := IRFunction(IRLabel("check_cv"))) + + check_entry_bb = check_fn.get_basic_block("check_cv") + check_entry_bb.append_instruction("callvalue", ret=IRVariable("1")) + check_entry_bb.append_instruction("param", ret=IRVariable("2")) + check_entry_bb.append_instruction( + "jnz", IRVariable("1"), IRLabel("has_value"), IRLabel("no_value") + ) + check_fn.append_basic_block(no_value_bb := IRBasicBlock(IRLabel("no_value"), check_fn)) + no_value_bb.append_instruction("ret", IRVariable("2")) + + check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) + value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) + value_bb.append_instruction("stop") + + check_fn.last_variable = 2 + + expected_ctx.data_segment = [ + IRInstruction("dbname", [IRLabel("selector_buckets")]), + IRInstruction("db", [IRLabel("selector_bucket_0")]), + IRInstruction("db", [IRLabel("fallback")]), + IRInstruction("db", [IRLabel("selector_bucket_2")]), + IRInstruction("db", [IRLabel("selector_bucket_3")]), + IRInstruction("db", [IRLabel("selector_bucket_6")]), + ] + + assert_ctx_eq(parsed_ctx, expected_ctx) diff --git a/vyper/cli/venom_main.py b/vyper/cli/venom_main.py new file mode 100755 index 0000000000..3114246e04 --- /dev/null +++ b/vyper/cli/venom_main.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import argparse +import sys + +import vyper +import vyper.evm.opcodes as evm +from vyper.compiler.phases import generate_bytecode +from vyper.compiler.settings import OptimizationLevel, Settings, set_global_settings +from vyper.venom import generate_assembly_experimental, run_passes_on +from vyper.venom.parser import parse_venom + +""" +Standalone entry point into venom compiler. Parses venom input and emits +bytecode. +""" + + +def _parse_cli_args(): + return _parse_args(sys.argv[1:]) + + +def _parse_args(argv: list[str]): + parser = argparse.ArgumentParser( + description="Venom EVM IR parser & compiler", formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument("input_file", help="Venom sourcefile", nargs="?") + parser.add_argument("--version", action="version", version=vyper.__long_version__) + parser.add_argument( + "--evm-version", + help=f"Select desired EVM version (default {evm.DEFAULT_EVM_VERSION})", + choices=list(evm.EVM_VERSIONS), + dest="evm_version", + ) + parser.add_argument( + "--stdin", action="store_true", help="whether to pull venom input from stdin" + ) + + args = parser.parse_args(argv) + + if args.evm_version is not None: + set_global_settings(Settings(evm_version=args.evm_version)) + + if args.stdin: + if not sys.stdin.isatty(): + venom_source = sys.stdin.read() + else: + # No input provided + print("Error: --stdin flag used but no input provided") + sys.exit(1) + else: + if args.input_file is None: + print("Error: No input file provided, either use --stdin or provide a path") + sys.exit(1) + with open(args.input_file, "r") as f: + venom_source = f.read() + + ctx = parse_venom(venom_source) + run_passes_on(ctx, OptimizationLevel.default()) + asm = generate_assembly_experimental(ctx) + bytecode = generate_bytecode(asm, compiler_metadata=None) + print(f"0x{bytecode.hex()}") + + +if __name__ == "__main__": + _parse_args(sys.argv[1:]) diff --git a/vyper/venom/README.md b/vyper/venom/README.md index ea6eabebaa..964f52b524 100644 --- a/vyper/venom/README.md +++ b/vyper/venom/README.md @@ -29,59 +29,43 @@ Venom employs two scopes: global and function level. ### Example code ```llvm -IRFunction: global - -global: - %1 = calldataload 0 - %2 = shr 224, %1 - jmp label %selector_bucket_0 - -selector_bucket_0: - %3 = xor %2, 1579456981 - %4 = iszero %3 - jnz label %1, label %2, %4 - -1: IN=[selector_bucket_0] OUT=[9] - jmp label %fallback - -2: - %5 = callvalue - %6 = calldatasize - %7 = lt %6, 164 - %8 = or %5, %7 - %9 = iszero %8 - assert %9 - stop - -fallback: - revert 0, 0 +function global { + global: + %1 = calldataload 0 + %2 = shr 224, %1 + jmp @selector_bucket_0 + + selector_bucket_0: + %3 = xor %2, 1579456981 + %4 = iszero %3 + jnz @1, @2, %4 + + 1: + jmp @fallback + + 2: + %5 = callvalue + %6 = calldatasize + %7 = lt %6, 164 + %8 = or %5, %7 + %9 = iszero %8 + assert %9 + stop + + fallback: + revert 0, 0 +} + +[data] ``` ### Grammar -Below is a (not-so-complete) grammar to describe the text format of Venom IR: +To see a definition of grammar see the [venom parser](./parser.py) -```llvm -program ::= function_declaration* - -function_declaration ::= "IRFunction:" identifier input_list? output_list? "=>" block - -input_list ::= "IN=" "[" (identifier ("," identifier)*)? "]" -output_list ::= "OUT=" "[" (identifier ("," identifier)*)? "]" - -block ::= label ":" input_list? output_list? "=>{" operation* "}" +### Compiling Venom -operation ::= "%" identifier "=" opcode operand ("," operand)* - | opcode operand ("," operand)* - -opcode ::= "calldataload" | "shr" | "shl" | "and" | "add" | "codecopy" | "mload" | "jmp" | "xor" | "iszero" | "jnz" | "label" | "lt" | "or" | "assert" | "callvalue" | "calldatasize" | "alloca" | "calldatacopy" | "invoke" | "gt" | ... - -operand ::= "%" identifier | label | integer | "label" "%" identifier -label ::= "%" identifier - -identifier ::= [a-zA-Z_][a-zA-Z0-9_]* -integer ::= [0-9]+ -``` +Vyper ships with a venom compiler which compiles venom code to bytecode directly. It can be run by running `venom`, which is installed as a standalone binary when `vyper` is installed via `pip`. ## Implementation diff --git a/vyper/venom/__init__.py b/vyper/venom/__init__.py index 593a9556a9..7d9404b9ef 100644 --- a/vyper/venom/__init__.py +++ b/vyper/venom/__init__.py @@ -72,10 +72,14 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None: DFTPass(ac, fn).run_pass() +def run_passes_on(ctx: IRContext, optimize: OptimizationLevel): + for fn in ctx.functions.values(): + _run_passes(fn, optimize) + + def generate_ir(ir: IRnode, optimize: OptimizationLevel) -> IRContext: # Convert "old" IR to "new" IR ctx = ir_node_to_venom(ir) - for fn in ctx.functions.values(): - _run_passes(fn, optimize) + run_passes_on(ctx, optimize) return ctx diff --git a/vyper/venom/analysis/cfg.py b/vyper/venom/analysis/cfg.py index 700fd73f26..2f90410cd5 100644 --- a/vyper/venom/analysis/cfg.py +++ b/vyper/venom/analysis/cfg.py @@ -23,7 +23,7 @@ def analyze(self) -> None: bb.is_reachable = False for bb in fn.get_basic_blocks(): - assert bb.is_terminated + assert bb.is_terminated, f"not terminating:\n{bb}" term = bb.instructions[-1] if term.opcode in CFG_ALTERING_INSTRUCTIONS: diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 968ce42bdf..e159a6d464 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -513,7 +513,7 @@ def insert_instruction(self, instruction: IRInstruction, index: Optional[int] = assert isinstance(instruction, IRInstruction), "instruction must be an IRInstruction" if index is None: - assert not self.is_terminated, self + assert not self.is_terminated, (self, instruction) index = len(self.instructions) instruction.parent = self instruction.ast_source = self.parent.ast_source diff --git a/vyper/venom/function.py b/vyper/venom/function.py index 0c48c9740e..2372f8ba52 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -12,7 +12,6 @@ class IRFunction: name: IRLabel # symbol name ctx: "IRContext" # type: ignore # noqa: F821 args: list - last_label: int last_variable: int _basic_block_dict: dict[str, IRBasicBlock] @@ -182,7 +181,6 @@ def chain_basic_blocks(self) -> None: def copy(self): new = IRFunction(self.name) new._basic_block_dict = self._basic_block_dict.copy() - new.last_label = self.last_label new.last_variable = self.last_variable return new diff --git a/vyper/venom/parser.py b/vyper/venom/parser.py new file mode 100644 index 0000000000..d2574c3b0c --- /dev/null +++ b/vyper/venom/parser.py @@ -0,0 +1,178 @@ +from lark import Lark, Transformer + +from vyper.venom.basicblock import ( + IRBasicBlock, + IRInstruction, + IRLabel, + IRLiteral, + IROperand, + IRVariable, +) +from vyper.venom.context import IRContext +from vyper.venom.function import IRFunction + +VENOM_PARSER = Lark( + """ + %import common.CNAME + %import common.DIGIT + %import common.LETTER + %import common.WS + %import common.INT + + # TODO: make data_section optional -- `function* data_section?` + start: function* data_section + + # TODO: consider making entry block implicit, e.g. + # `"{" instruction+ block* "}"` + function: "function" NAME "{" block* "}" + + data_section: "[data]" instruction* + + block: NAME ":" statement* + + statement: instruction | assignment + assignment: VAR_IDENT "=" expr + expr: instruction | CONST + instruction: OPCODE operands_list? + + operands_list: operand ("," operand)* + + operand: VAR_IDENT | CONST | LABEL + + CONST: INT + OPCODE: CNAME + VAR_IDENT: "%" INT (":" INT)? + LABEL: "@" NAME + NAME: (DIGIT|LETTER|"_")+ + + %ignore WS + """ +) + + +def _set_last_var(fn: IRFunction): + for bb in fn.get_basic_blocks(): + for inst in bb.instructions: + if inst.output is None: + continue + value = inst.output.value + assert value.startswith("%") + fn.last_variable = max(fn.last_variable, int(value[1:])) + + +def _set_last_label(ctx: IRContext): + for fn in ctx.functions.values(): + for bb in fn.get_basic_blocks(): + label = bb.label.value + label_head, *_ = label.split("_", maxsplit=1) + if label_head.isdigit(): + ctx.last_label = max(int(label_head), ctx.last_label) + + +def _ensure_terminated(bb): + # Since "revert" is not considered terminal explicitly check for it to ensure basic + # blocks are terminating + if not bb.is_terminated and any(inst.opcode == "revert" for inst in bb.instructions): + bb.append_instruction("stop") + + +class VenomTransformer(Transformer): + def start(self, children) -> IRContext: + ctx = IRContext() + funcs = children[:-1] + data_section = children[-1] + for fn_name, blocks in funcs: + fn = ctx.create_function(fn_name) + fn._basic_block_dict.clear() + + for block_name, instructions in blocks: + bb = IRBasicBlock(IRLabel(block_name), fn) + fn.append_basic_block(bb) + + for instruction in instructions: + assert isinstance(instruction, IRInstruction) # help mypy + bb.insert_instruction(instruction) + + _ensure_terminated(bb) + + _set_last_var(fn) + _set_last_label(ctx) + + ctx.data_segment = data_section + + return ctx + + def function(self, children) -> tuple[str, list[tuple[str, list[IRInstruction]]]]: + name, *blocks = children + return name, blocks + + def statement(self, children): + return children[0] + + def data_section(self, children): + return children + + def block(self, children) -> tuple[str, list[IRInstruction]]: + label, *instructions = children + return label, instructions + + def assignment(self, children) -> IRInstruction: + to, value = children + if isinstance(value, IRInstruction): + value.output = to + return value + if isinstance(value, IRLiteral): + return IRInstruction("store", [value], output=to) + raise TypeError(f"Unexpected value {value} of type {type(value)}") + + def expr(self, children): + return children[0] + + def instruction(self, children) -> IRInstruction: + if len(children) == 1: + name = children[0] + operands = [] + else: + assert len(children) == 2 + name, operands = children + + # reverse operands, venom internally represents top of stack + # as rightmost operand + return IRInstruction(name, reversed(operands)) + + def operands_list(self, children) -> list[IROperand]: + return children + + def operand(self, children) -> IROperand: + return children[0] + + def OPCODE(self, token): + return token.value + + def LABEL(self, label) -> IRLabel: + return IRLabel(label[1:]) + + def VAR_IDENT(self, var_ident) -> IRVariable: + parts = var_ident[1:].split(":", maxsplit=1) + assert 1 <= len(parts) <= 2 + varname = parts[0] + version = None + if len(parts) > 1: + version = parts[1] + return IRVariable(varname, version=version) + + def CONST(self, val) -> IRLiteral: + return IRLiteral(int(val)) + + def CNAME(self, val) -> str: + return val.value + + def NAME(self, val) -> str: + return val.value + + +def parse_venom(source: str) -> IRContext: + tree = VENOM_PARSER.parse(source) + ctx = VenomTransformer().transform(tree) + assert isinstance(ctx, IRContext) # help mypy + return ctx diff --git a/vyper/venom/passes/sccp/sccp.py b/vyper/venom/passes/sccp/sccp.py index 369be3e753..9004a357f0 100644 --- a/vyper/venom/passes/sccp/sccp.py +++ b/vyper/venom/passes/sccp/sccp.py @@ -143,7 +143,7 @@ def _handle_SSA_work_item(self, work_item: SSAWorkListItem): self._visit_expr(work_item.inst) def _lookup_from_lattice(self, op: IROperand) -> LatticeItem: - assert isinstance(op, IRVariable), "Can't get lattice for non-variable" + assert isinstance(op, IRVariable), f"Can't get lattice for non-variable ({op})" lat = self.lattice[op] assert lat is not None, f"Got undefined var {op}" return lat