Skip to content

Commit

Permalink
Add instruction offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
RaoulSchaffranek committed Sep 18, 2024
1 parent c51a87f commit efb62b7
Showing 1 changed file with 51 additions and 15 deletions.
66 changes: 51 additions & 15 deletions pyevmasm/evmasm.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
description,
operand=None,
pc=0,
offset=0,
):
"""
This represents an EVM instruction.
Expand All @@ -75,6 +76,7 @@ def __init__(
:param description: textual description of the instruction
:param operand: optional immediate operand
:param pc: optional program counter of this instruction in the program
:param offset: optional offset of this instruction in the bytecode
Example use::
Expand All @@ -83,6 +85,7 @@ def __init__(
>>> print('\tdescription:', instruction.description)
>>> print('\tgroup:', instruction.group)
>>> print('\tpc:', instruction.pc)
>>> print('\toffset:', instruction.offset)
>>> print('\tsize:', instruction.size)
>>> print('\thas_operand:', instruction.has_operand)
>>> print('\toperand_size:', instruction.operand_size)
Expand Down Expand Up @@ -110,6 +113,7 @@ def __init__(
self._description = description
self._operand = operand # Immediate operand if any
self._pc = pc
self._offset = offset

def __eq__(self, other):
"""Instructions are equal if all features match"""
Expand All @@ -122,11 +126,12 @@ def __eq__(self, other):
and self._pushes == other._pushes
and self._fee == other._fee
and self._pc == other._pc
and self._offset == other._offset
and self._description == other._description
)

def __repr__(self):
output = "Instruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {})".format(
output = "Instruction(0x{:x}, {}, {:d}, {:d}, {:d}, {:d}, {}, {}, {}, {})".format(
self._opcode,
self._name,
self._operand_size,
Expand All @@ -136,6 +141,7 @@ def __repr__(self):
self._description,
self._operand,
self._pc,
self._offset
)
return output

Expand Down Expand Up @@ -261,6 +267,15 @@ def pc(self, value):
"""Location in the program (optional)"""
self._pc = value

@property
def offset(self):
return self._offset

@offset.setter
def offset(self, value):
"""Offset in the bytecode (optional)"""
self._offset = value

@property
def group(self):
"""Instruction classification as per the yellow paper"""
Expand Down Expand Up @@ -407,13 +422,15 @@ def is_arithmetic(self):
}


def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
def assemble_one(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
"""Assemble one EVM instruction from its textual representation.
:param asmcode: assembly code for one instruction
:type asmcode: str
:param pc: program counter of the instruction(optional)
:type pc: int
:param offset: offset of the instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: An Instruction object
Expand All @@ -431,6 +448,8 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
instr = instruction_table[asmcode[0].upper()]
if pc:
instr.pc = pc
if offset:
instr.offset = offset
if instr.operand_size > 0:
assert len(asmcode) == 2
instr.operand = int(asmcode[1], 0)
Expand All @@ -439,13 +458,15 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK):
raise AssembleError("Something wrong at pc {:d}".format(pc))


def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK):
def assemble_all(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
""" Assemble a sequence of textual representation of EVM instructions
:param asmcode: assembly code for any number of instructions
:type asmcode: str
:param pc: program counter of the first instruction(optional)
:type pc: int
:param offset: offset of the first instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: An generator of Instruction objects
Expand All @@ -471,18 +492,21 @@ def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK):
for line in asmcode:
if not line.strip():
continue
instr = assemble_one(line, pc=pc, fork=fork)
instr = assemble_one(line, pc=pc, offset=offset, fork=fork)
yield instr
pc += instr.size
offset += 1


def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
def disassemble_one(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
"""Disassemble a single instruction from a bytecode
:param bytecode: the bytecode stream
:type bytecode: str | bytes | bytearray | iterator
:param pc: program counter of the instruction(optional)
:type pc: int
:param offset: offset of the instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: an Instruction object
Expand Down Expand Up @@ -513,6 +537,7 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
opcode, "INVALID", 0, 0, 0, 0, "Unspecified invalid instruction."
)
instruction.pc = pc
instruction.offset = offset

try:
if instruction.has_operand:
Expand All @@ -523,13 +548,15 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK):
return instruction


def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK):
def disassemble_all(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
"""Disassemble all instructions in bytecode
:param bytecode: an evm bytecode (binary)
:type bytecode: str | bytes | bytearray | iterator
:param pc: program counter of the first instruction(optional)
:type pc: int
:param offset: offset of the first instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: An generator of Instruction objects
Expand Down Expand Up @@ -561,20 +588,23 @@ def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK):

bytecode = iter(bytecode)
while True:
instr = disassemble_one(bytecode, pc=pc, fork=fork)
instr = disassemble_one(bytecode, pc=pc, offset=offset, fork=fork)
if not instr:
return
pc += instr.size
offset += 1
yield instr


def disassemble(bytecode, pc=0, fork=DEFAULT_FORK):
def disassemble(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
"""Disassemble an EVM bytecode
:param bytecode: binary representation of an evm bytecode
:type bytecode: str | bytes | bytearray
:param pc: program counter of the first instruction(optional)
:type pc: int
:param offset: offset of the first instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: the text representation of the assembler code
Expand All @@ -590,16 +620,18 @@ def disassemble(bytecode, pc=0, fork=DEFAULT_FORK):
PUSH2 0x100
"""
return "\n".join(map(str, disassemble_all(bytecode, pc=pc, fork=fork)))
return "\n".join(map(str, disassemble_all(bytecode, pc=pc, offset=offset, fork=fork)))


def assemble(asmcode, pc=0, fork=DEFAULT_FORK):
def assemble(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
""" Assemble an EVM program
:param asmcode: an evm assembler program
:type asmcode: str
:param pc: program counter of the first instruction(optional)
:type pc: int
:param offset: offset of the first instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: the hex representation of the bytecode
Expand All @@ -616,16 +648,18 @@ def assemble(asmcode, pc=0, fork=DEFAULT_FORK):
...
b"\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00"
"""
return b"".join(x.bytes for x in assemble_all(asmcode, pc=pc, fork=fork))
return b"".join(x.bytes for x in assemble_all(asmcode, pc=pc, offset=offset, fork=fork))


def disassemble_hex(bytecode, pc=0, fork=DEFAULT_FORK):
def disassemble_hex(bytecode, pc=0, offset=0, fork=DEFAULT_FORK):
"""Disassemble an EVM bytecode
:param bytecode: canonical representation of an evm bytecode (hexadecimal)
:type bytecode: str
:param pc: program counter of the first instruction(optional)
:type pc: int
:param offset: offset of the first instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: the text representation of the assembler code
Expand All @@ -645,16 +679,18 @@ def disassemble_hex(bytecode, pc=0, fork=DEFAULT_FORK):
if bytecode.startswith("0x"):
bytecode = bytecode[2:]
bytecode = unhexlify(bytecode)
return disassemble(bytecode, pc=pc, fork=fork)
return disassemble(bytecode, pc=pc, offset=offset, fork=fork)


def assemble_hex(asmcode, pc=0, fork=DEFAULT_FORK):
def assemble_hex(asmcode, pc=0, offset=0, fork=DEFAULT_FORK):
""" Assemble an EVM program
:param asmcode: an evm assembler program
:type asmcode: str | iterator[Instruction]
:param pc: program counter of the first instruction(optional)
:type pc: int
:param offset: offset of the first instruction in the bytecode(optional)
:type offset: int
:param fork: fork name (optional)
:type fork: str
:return: the hex representation of the bytecode
Expand All @@ -673,7 +709,7 @@ def assemble_hex(asmcode, pc=0, fork=DEFAULT_FORK):
"""
if isinstance(asmcode, list):
return "0x" + hexlify(b"".join([x.bytes for x in asmcode])).decode("ascii")
return "0x" + hexlify(assemble(asmcode, pc=pc, fork=fork)).decode("ascii")
return "0x" + hexlify(assemble(asmcode, pc=pc, offset=offset, fork=fork)).decode("ascii")


class InstructionTable:
Expand Down

0 comments on commit efb62b7

Please sign in to comment.