diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index 07fbac4f..732e6592 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -9,7 +9,8 @@ on: jobs: test: - runs-on: macos-latest + runs-on: ubuntu-latest + container: ghcr.io/emperororokusaki/solvers:0.1.0 strategy: fail-fast: false @@ -22,11 +23,11 @@ jobs: - repo: "a16z/cicada" dir: "cicada" cmd: "halmos --contract LibUint1024Test --function testProve --loop 256" - branch: "" + branch: "pin-solc" - repo: "a16z/cicada" dir: "cicada" cmd: "halmos --contract LibPrimeTest --function testProve --loop 256" - branch: "" + branch: "pin-solc" - repo: "farcasterxyz/contracts" dir: "farcaster-contracts" cmd: "halmos" @@ -52,20 +53,6 @@ jobs: ref: ${{ matrix.branch }} submodules: recursive - - name: Install Foundry - uses: foundry-rs/foundry-toolchain@v1 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Install dependencies - run: python -m pip install --upgrade pip - - - name: Install Halmos - run: python -m pip install -e ./halmos - - name: Test external repo - run: ${{ matrix.cmd }} -v -st --solver-timeout-assertion 0 --solver-threads 2 + run: ${{ matrix.cmd }} --statistics --debug --solver-timeout-assertion 0 --solver-threads 4 --solver-command=yices-smt2 working-directory: ${{ matrix.dir }} diff --git a/src/halmos/__main__.py b/src/halmos/__main__.py index 695b2f74..49e1e7e0 100644 --- a/src/halmos/__main__.py +++ b/src/halmos/__main__.py @@ -299,6 +299,7 @@ def run_bytecode(hexcode: str, args: Namespace) -> List[Exec]: caller=mk_caller(args), value=mk_callvalue(), data=ByteVec(), + call_scheme=EVM.CALL, ) sevm = SEVM(options) @@ -357,6 +358,7 @@ def deploy_test( caller=mk_caller(args), value=0, data=ByteVec(), + call_scheme=EVM.CREATE, ) ex = sevm.mk_exec( @@ -449,6 +451,7 @@ def setup( caller=setup_ex.message().caller, value=0, data=calldata, + call_scheme=EVM.CALL, ), ) @@ -593,6 +596,7 @@ def run( caller=setup_ex.caller(), value=0, data=cd, + call_scheme=EVM.CALL, ) # @@ -1018,7 +1022,8 @@ def solve( dump_filename = f"/tmp/{uuid.uuid4().hex}.smt2" with open(dump_filename, "w") as f: - print(f"Writing SMT query to {dump_filename}") + if args.verbose >= 1: + print(f"Writing SMT query to {dump_filename}") f.write("(set-logic QF_AUFBV)\n") f.write(query) f.write("(get-model)\n") diff --git a/src/halmos/bytevec.py b/src/halmos/bytevec.py index b6108084..a1e48ff5 100644 --- a/src/halmos/bytevec.py +++ b/src/halmos/bytevec.py @@ -295,7 +295,7 @@ def __eq__(self, other) -> bool: return self.length == len(other) and eq(self.unwrap(), other.unwrap()) def __repr__(self) -> str: - return f"SymbolicChunk({self.data!r})" + return f"SymbolicChunk({self.data!r}, start={self.start}, length={self.length})" @dataclass diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index dbac1988..012327dd 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -79,37 +79,27 @@ class Instruction: - pc: int opcode: int - operand: Optional[ByteVec] + pc: int = -1 + next_pc: int = -1 + operand: Optional[ByteVec] = None - def __init__(self, opcode, **kwargs) -> None: + def __init__(self, opcode, pc=-1, next_pc=-1, operand=None) -> None: self.opcode = opcode - self.pc = kwargs.get("pc", -1) - self.operand = kwargs.get("operand", None) + self.pc = pc + self.next_pc = next_pc + self.operand = operand def __str__(self) -> str: - operand_str = "" - if self.operand is not None: - operand = self.operand - if isinstance(operand, bytes): - operand = con(int.from_bytes(operand, "big"), len(operand) * 8) - - expected_operand_length = instruction_length(self.opcode) - 1 - actual_operand_length = operand.size() // 8 - if expected_operand_length != actual_operand_length: - operand_str = f" ERROR {operand} ({expected_operand_length - actual_operand_length} bytes missed)" - else: - operand_str = " " + str(operand) - + operand_str = f" {hexify(self.operand)}" if self.operand is not None else "" return f"{mnemonic(self.opcode)}{operand_str}" def __repr__(self) -> str: return f"Instruction({mnemonic(self.opcode)}, pc={self.pc}, operand={repr(self.operand)})" def __len__(self) -> int: - return instruction_length(self.opcode) + return self.next_pc - self.pc def id_str(x: Any) -> str: @@ -124,11 +114,6 @@ def mnemonic(opcode) -> str: return str(opcode) -def instruction_length(opcode: Any) -> int: - opcode = int_of(opcode) - return (opcode - EVM.PUSH0 + 1) if EVM.PUSH1 <= opcode <= EVM.PUSH32 else 1 - - def is_byte(x: Any) -> bool: if is_bv(x): return eq(x.sort(), BitVecSort8) @@ -207,12 +192,15 @@ class Message: caller: Address value: Word data: ByteVec + + # we outer calls, we expect a virtual call scheme to be provided, either CREATE or CALL + call_scheme: int + is_static: bool = False - call_scheme: int = EVM.CALL gas: Optional[Word] = None def is_create(self) -> bool: - return self.call_scheme == EVM.CREATE or self.call_scheme == EVM.CREATE2 + return self.call_scheme in (EVM.CREATE, EVM.CREATE2) @dataclass @@ -299,6 +287,12 @@ def __deepcopy__(self, memo): # -> State: st.memory = self.memory.copy() return st + def dump(self, print_mem=False) -> str: + if print_mem: + return f"Stack: {str(list(reversed(self.stack)))}\n{self.str_memory()}" + else: + return f"Stack: {str(list(reversed(self.stack)))}" + def __str__(self) -> str: return f"Stack: {str(list(reversed(self.stack)))}\n{self.str_memory()}" @@ -380,7 +374,11 @@ def __init__(self, code: Optional[ByteVec] = None) -> None: self._code = code + # maps pc to decoded instruction (including operand and next_pc) + self._insn = dict() + def __init_jumpdests(self): + assert not hasattr(self, "_jumpdests") self._jumpdests = set((pc for (pc, op) in iter(self) if op == EVM.JUMPDEST)) def __iter__(self): @@ -403,18 +401,30 @@ def from_hexcode(hexcode: str): except ValueError as e: raise ValueError(f"{e} (hexcode={hexcode})") - def decode_instruction(self, pc: int) -> Instruction: + def _decode_instruction(self, pc: int) -> Instruction: opcode = int_of(self._code[pc], f"symbolic opcode at pc={pc}") if EVM.PUSH1 <= opcode <= EVM.PUSH32: - operand = self.slice(pc + 1, pc + opcode - EVM.PUSH0 + 1).unwrap() - return Instruction(opcode, pc=pc, operand=operand) + operand_offset = pc + 1 + operand_size = opcode - EVM.PUSH0 + next_pc = operand_offset + operand_size + + # TODO: consider slicing lazily + operand = self.slice(operand_offset, next_pc).unwrap() + return Instruction(opcode, pc=pc, operand=operand, next_pc=next_pc) - return Instruction(opcode, pc=pc) + return Instruction(opcode, pc=pc, next_pc=pc + 1) + + def decode_instruction(self, pc: int) -> Instruction: + insn = self._insn.get(pc, None) + if insn is None: + insn = self._decode_instruction(pc) + self._insn[pc] = insn + + return insn def next_pc(self, pc): - opcode = self[pc] - return pc + instruction_length(opcode) + return self.decode_instruction(pc).next_pc def slice(self, start, stop) -> ByteVec: return self._code.slice(start, stop) @@ -451,11 +461,9 @@ def __next__(self) -> Tuple[int, int]: try: pc = self.pc - opcode = self.contract[pc] - - # this avoids decoding instruction operands (don't slice if we don't need to) - self.pc += instruction_length(opcode) - return (pc, opcode) + insn = self.contract.decode_instruction(pc) + self.pc = insn.next_pc + return (pc, insn.opcode) except NotConcreteError: raise StopIteration @@ -580,7 +588,7 @@ class Exec: # an execution path alias: Dict[Address, Address] # address aliases # internal bookkeeping - cnts: Dict[str, Dict[int, int]] # opcode -> frequency; counters + cnts: Dict[str, int] # counters sha3s: Dict[Word, int] # sha3 hashes generated storages: Dict[Any, Any] # storage updates balances: Dict[Any, Any] # balance updates @@ -671,21 +679,16 @@ def set_code(self, who: Address, code: UnionType[ByteVec, Contract]) -> None: assert_address(who) self.code[who] = code if isinstance(code, Contract) else Contract(code) - def str_cnts(self) -> str: - return "".join( - [ - f"{x[0]}: {x[1]}\n" - for x in sorted(self.cnts["opcode"].items(), key=lambda x: x[0]) - ] - ) - def __str__(self) -> str: + return self.dump() + + def dump(self, print_mem=False) -> str: output = self.context.output.data return hexify( "".join( [ f"PC: {self.this} {self.pc} {mnemonic(self.current_opcode())}\n", - str(self.st), + self.st.dump(print_mem=print_mem), f"Balance: {self.balance}\n", f"Storage:\n", "".join( @@ -828,18 +831,16 @@ def assume_sha3_distinct(self, sha3_expr) -> None: self.sha3s[sha3_expr] = len(self.sha3s) def new_gas_id(self) -> int: - self.cnts["fresh"]["gas"] += 1 - return self.cnts["fresh"]["gas"] + self.cnts["gas"] += 1 + return self.cnts["gas"] def new_address(self) -> Address: - self.cnts["fresh"]["address"] += 1 - return con_addr( - magic_address + new_address_offset + self.cnts["fresh"]["address"] - ) + self.cnts["address"] += 1 + return con_addr(magic_address + new_address_offset + self.cnts["address"]) def new_symbol_id(self) -> int: - self.cnts["fresh"]["symbol"] += 1 - return self.cnts["fresh"]["symbol"] + self.cnts["symbol"] += 1 + return self.cnts["symbol"] def returndata(self) -> Optional[ByteVec]: """ @@ -860,21 +861,18 @@ def returndatasize(self) -> int: returndata = self.returndata() return len(returndata) if returndata is not None else 0 - def is_jumpdest(self, x: Word) -> bool: - if not is_concrete(x): - return False - - pc: int = int_of(x) - if pc < 0: - raise ValueError(pc) - - return pc in self.pgm.valid_jump_destinations() - def jumpi_id(self) -> str: - return f"{self.pc}:" + ",".join( - map(lambda x: str(x) if self.is_jumpdest(x) else "", self.st.stack) + # TODO: avoid scanning the entire stack for jumpdests every time + valid_jumpdests = self.pgm.valid_jump_destinations() + + jumpdests_str = ( + str(unboxed) + for x in self.st.stack + if (unboxed := unbox_int(x)) in valid_jumpdests ) + return f"{self.pc}:{','.join(jumpdests_str)}" + # deploy libraries and resolve library placeholders in hexcode def resolve_libs(self, creation_hexcode, deployed_hexcode, lib_references) -> str: if lib_references: @@ -2097,17 +2095,11 @@ def finalize(ex: Exec): if ex.context.depth > MAX_CALL_DEPTH: raise MessageDepthLimitError(ex.context) - # print(f"{hexify(ex.this)}@{ex.pc}", end=" ") insn = ex.current_instruction() - # print(f"insn={insn}") - opcode = insn.opcode - ex.cnts["opcode"][opcode] += 1 - if ( - "max_depth" in self.options - and sum(ex.cnts["opcode"].values()) > self.options["max_depth"] - ): + max_depth = self.options.get("max_depth", 0) + if max_depth and step_id > max_depth: continue # TODO: clean up @@ -2121,7 +2113,7 @@ def finalize(ex: Exec): self.steps[step_id] = {"parent": prev_step_id, "exec": str(ex)} if self.options.get("print_steps"): - print(ex) + print(ex.dump(print_mem=self.options.get("print_mem", False))) if opcode in [EVM.STOP, EVM.INVALID, EVM.REVERT, EVM.RETURN]: if opcode == EVM.STOP: @@ -2407,6 +2399,21 @@ def finalize(ex: Exec): codeslice: ByteVec = ex.pgm.slice(offset, offset + size) ex.st.memory.set_slice(loc, loc + size, codeslice) + elif opcode == EVM.MCOPY: + dest_offset = int_of(ex.st.pop(), "symbolic MCOPY destOffset") + src_offset = int_of(ex.st.pop(), "symbolic MCOPY srcOffset") + size = int_of(ex.st.pop(), "symbolic MCOPY size") + + if size > 0: + src_end_loc = src_offset + size + dst_end_loc = dest_offset + size + + if max(src_end_loc, dst_end_loc) > MAX_MEMORY_SIZE: + raise HalmosException("MCOPY > MAX_MEMORY_SIZE") + + data = ex.st.memory.slice(src_offset, src_end_loc) + ex.st.memory.set_slice(dest_offset, dst_end_loc, data) + elif opcode == EVM.BYTE: idx = ex.st.pop() w = ex.st.pop() @@ -2521,7 +2528,7 @@ def mk_exec( alias={}, # log=[], - cnts=defaultdict(lambda: defaultdict(int)), + cnts=defaultdict(int), sha3s={}, storages={}, balances={}, diff --git a/src/halmos/utils.py b/src/halmos/utils.py index 907cdd6d..f07b6397 100644 --- a/src/halmos/utils.py +++ b/src/halmos/utils.py @@ -257,18 +257,21 @@ def unbox_int(x: Any) -> Any: """ Attempts to convert int-like objects to int """ + if isinstance(x, int): + return x + if hasattr(x, "unwrap"): return unbox_int(x.unwrap()) if isinstance(x, bytes): return int.from_bytes(x, "big") - if is_bv(x): - x = simplify(x) - if is_bv_value(x): return x.as_long() + if is_bv(x): + x = simplify(x) + return x @@ -314,7 +317,7 @@ def hexify(x): if isinstance(x, str): return re.sub(r"\b(\d+)\b", lambda match: hex(int(match.group(1))), x) elif isinstance(x, int): - return hex(x) + return f"0x{x:02x}" elif isinstance(x, bytes): return "0x" + x.hex() elif hasattr(x, "unwrap"): @@ -557,6 +560,7 @@ class EVM: MSIZE = 0x59 GAS = 0x5A JUMPDEST = 0x5B + MCOPY = 0x5E PUSH0 = 0x5F PUSH1 = 0x60 PUSH2 = 0x61 diff --git a/tests/test_bytevec.py b/tests/test_bytevec.py index 15867536..38f7d325 100644 --- a/tests/test_bytevec.py +++ b/tests/test_bytevec.py @@ -700,3 +700,36 @@ def test_writing_bytevec_to_bytevec_makes_copy(mem): # then the memory should not be affected assert mem.unwrap() == b"hel!!!!rld" + + +def test_memory_write_slice_overlapping_forward(mem): + mem[:5] = b"hello" + + # when we write a slice that overlaps with itself + mem[2:7] = mem[:5] + + # then the existing memory should be correctly overwritten + assert mem._well_formed() + assert len(mem) == 7 + assert mem[:].unwrap() == b"hehello" + + +def test_memory_write_slice_overlapping_backward(mem): + x = BitVec("x", 256) + mem.set_word(32, x) + + # when we write a slice that overlaps with itself + mem[16:48] = mem[32:64] + + # then the existing memory should be correctly overwritten + assert mem._well_formed() + assert len(mem) == 64 + + # 16 bytes of zeroes, top half of x + assert eq(mem[:32].unwrap(), Concat(BitVecVal(0, 128), Extract(255, 128, x))) + + # just x itself + assert eq(mem[16:48].unwrap(), x) + + # bottom half of x, bottom half of x + assert eq(mem[32:64].unwrap(), Concat(Extract(127, 0, x), Extract(127, 0, x))) diff --git a/tests/test_cli.py b/tests/test_cli.py index d55f527c..21f5ab70 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,7 +3,7 @@ from z3 import * -from halmos.utils import EVM +from halmos.utils import EVM, hexify from halmos.sevm import con, Contract, Instruction @@ -96,7 +96,7 @@ def test_decode_mixed_bytecode(): ) disassembly = " ".join([str(contract.decode_instruction(pc)) for pc in pcs]) - assert disassembly == "PUSH20 x PUSH0 MSTORE PUSH1 20 PUSH1 12 RETURN" + assert disassembly == "PUSH20 x() PUSH0 MSTORE PUSH1 0x14 PUSH1 0x0c RETURN" # jump destination scanning assert contract.valid_jump_destinations() == set() @@ -115,7 +115,9 @@ def test_run_bytecode(args): def test_instruction(): assert str(Instruction(con(0))) == "STOP" assert str(Instruction(con(1))) == "ADD" - assert str(Instruction(con(EVM.PUSH32), operand=con(1))) == "PUSH32 1" + + push32_1_str = "PUSH32 " + hexify(con(1)) + assert str(Instruction(con(EVM.PUSH32), operand=con(1))) == push32_1_str assert str(Instruction(con(EVM.BASEFEE))) == "BASEFEE" # symbolic opcode is not supported @@ -126,13 +128,13 @@ def test_instruction(): assert str(Instruction(EVM.ADD)) == "ADD" assert ( str(Instruction(EVM.PUSH32, operand=bytes.fromhex("00" * 31 + "01"))) - == "PUSH32 1" + == push32_1_str ) def test_decode_hex(): code = Contract.from_hexcode("600100") - assert str(code.decode_instruction(0)) == "PUSH1 1" + assert str(code.decode_instruction(0)) == f"PUSH1 {hexify(1)}" assert [opcode for (pc, opcode) in code] == [0x60, 0x00] code = Contract.from_hexcode("01") @@ -146,7 +148,7 @@ def test_decode_hex(): def test_decode(): code = Contract(Concat(BitVecVal(EVM.PUSH32, 8), BitVec("x", 256))) assert len(code) == 33 - assert str(code.decode_instruction(0)) == "PUSH32 x" + assert str(code.decode_instruction(0)) == "PUSH32 x()" assert str(code.decode_instruction(33)) == "STOP" code = Contract(BitVec("x", 256)) @@ -157,7 +159,7 @@ def test_decode(): ops = list(code) assert len(ops) == 1 assert ( - str(code.decode_instruction(0)) == "PUSH3 Concat(x, 0)" + str(code.decode_instruction(0)) == "PUSH3 Concat(x(), 0x00)" ) # 'PUSH3 ERROR x (1 bytes missed)' diff --git a/tests/test_sevm.py b/tests/test_sevm.py index 5efb59ef..3c45b7e2 100644 --- a/tests/test_sevm.py +++ b/tests/test_sevm.py @@ -2,6 +2,7 @@ from z3 import * +from halmos.bytevec import ByteVec from halmos.exceptions import OutOfGasError from halmos.utils import EVM @@ -44,7 +45,15 @@ def storage(): def mk_ex(hexcode, sevm, solver, storage, caller, this): bytecode = Contract(hexcode) - message = Message(target=this, caller=caller, value=callvalue, data=[]) + + message = Message( + target=this, + caller=caller, + value=callvalue, + data=ByteVec(), + call_scheme=EVM.CALL, + ) + return sevm.mk_exec( code={this: bytecode}, storage={this: storage}, diff --git a/tests/test_traces.py b/tests/test_traces.py index 00d781f1..9861cc22 100644 --- a/tests/test_traces.py +++ b/tests/test_traces.py @@ -184,6 +184,7 @@ def mk_create_ex( caller=caller, value=value, data=ByteVec(), + call_scheme=EVM.CREATE, is_static=False, ) @@ -218,6 +219,7 @@ def mk_ex( caller=caller, value=value, data=data, + call_scheme=EVM.CALL, is_static=False, )