diff --git a/.gitignore b/.gitignore index 6cbecf6..ff845b8 100644 --- a/.gitignore +++ b/.gitignore @@ -82,6 +82,7 @@ celerybeat-schedule # virtualenv .venv venv/ +venv36/ ENV/ # Spyder project settings diff --git a/README.md b/README.md index 8119c8f..6c0ec31 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,53 @@ # pyevmasm -pyevmasm is an assembler and disassembler library for the Ethereum Virtual Machine (EVM). pyevmasm supports python 2.7 and newer. +pyevmasm is an assembler and disassembler library for the Ethereum Virtual Machine (EVM). -This library is currently new and under development. +## Examples +``` +>>> from pyevmasm import instruction_table, disassemble_hex, disassemble_all, assemble_hex +>>> instruction_table[20] +Instruction(0x14, 'EQ', 0, 2, 1, 3, 'Equality comparision.', None, 0) +>>> instruction_table['EQ'] +Instruction(0x14, 'EQ', 0, 2, 1, 3, 'Equality comparision.', None, 0) +>>> instrs = list(disassemble_all(binascii.unhexlify('608060405260043610603f57600035'))) +>>> instrs.insert(1, instruction_table['JUMPI']) +>>> a = assemble_hex(instrs) +>>> a +'0x60805760405260043610603f57600035' +>>> print(disassemble_hex(a)) +PUSH1 0x80 +JUMPI +PUSH1 0x40 +MSTORE +... +>>> assemble_hex('PUSH1 0x40\nMSTORE\n') +'0x604052' +``` -New issues, feature requests, and contributions are welcome. Join us in #ethereum channel on the [Empire Hacking Slack](https://empireslacking.herokuapp.com) to discuss Ethereum security tool development. +## evmasm +`evmasm` is a commandline utility that uses pyevmasm to assemble or disassemble EVM. -# evmasm -evmasm is a commandline utility that uses pyevmasm to assemble or disassemble EVM. Below is an example of disassembling the preamble of compiled contract. +``` +usage: evmasm [-h] (-a | -d | -t) [-bi] [-bo] [-i [INPUT]] [-o [OUTPUT]] + +pyevmasm the EVM assembler and disassembler +optional arguments: + -h, --help show this help message and exit + -a, --assemble Assemble EVM instructions to opcodes + -d, --disassemble Disassemble EVM to opcodes + -t, --print-opcode-table + List supported EVM opcodes + -bi, --binary-input Binary input mode (-d only) + -bo, --binary-output Binary output mode (-a only) + -i [INPUT], --input [INPUT] + Input file, default=stdin + -o [OUTPUT], --output [OUTPUT] + Output file, default=stdout +``` + + +Example; disassembling the preamble of compiled contract. ``` $ echo -n "608060405260043610603f57600035" | evmasm -d 00000000: PUSH1 0x80 @@ -25,6 +64,8 @@ $ echo -n "608060405260043610603f57600035" | evmasm -d # Installation +Python >=2.7 or Python >=3.3 is required. + Install the latest stable version using pip: ``` pip install pyevmasm @@ -37,3 +78,7 @@ cd pyevmasm python setup.py install ``` +## Documentation +[https://pyevmasm.readthedocs.io](https://pyevmasm.readthedocs.io) + +New issues, feature requests, and contributions are welcome. Join us in #ethereum channel on the [Empire Hacking Slack](https://empireslacking.herokuapp.com) to discuss Ethereum security tool development. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..1a86c1b --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = pyevmasm +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..b940991 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,10 @@ +API Reference +============= + +evmasm +------ +.. automodule:: pyevmasm.evmasm + :members: +.. py:data:: instruction + + Instance of InstructionTable for EVM. (see; InstructionTable) \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..92d9b11 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'pyevmasm' +copyright = '2018, Trail of Bits' +author = 'Trail of Bits' + +# The short X.Y version +version = '' +# The full version, including alpha/beta/rc tags +release = '' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'pyevmasmdoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'pyevmasm.tex', 'pyevmasm Documentation', + 'Trail of Bits', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'pyevmasm', 'pyevmasm Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'pyevmasm', 'pyevmasm Documentation', + author, 'pyevmasm', 'One line description of project.', + 'Miscellaneous'), +] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..8dfcbe4 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,21 @@ +.. pyevmasm documentation master file, created by + sphinx-quickstart on Wed Jul 11 19:50:09 2018. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to pyevmasm's documentation! +==================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + api + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/evmasm b/evmasm deleted file mode 100755 index 44db373..0000000 --- a/evmasm +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python -import argparse -import sys -import binascii - -from pyevmasm import EVMAsm - -def main(): - parser = argparse.ArgumentParser(description="pyevmasm the EVM assembler and disassembler") - parser.add_argument('-a','--assemble', action='store_true', help='Assemble EVM instructions to opcodes') - parser.add_argument('-d', '--disassemble', action='store_true', help='Disassemble EVM to opcodes') - parser.add_argument('-i', '--input', nargs='?', type=argparse.FileType('r'), default=sys.stdin, help='Input file, default=stdin') - parser.add_argument('-o', '--output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, help='Output file, default=stdout') - parser.add_argument('-t', '--print-opcode-table', action='store_true', help='List supported EVM opcodes') - - args = parser.parse_args(sys.argv[1:]) - - if args.print_opcode_table: - table = EVMAsm._get_reverse_table() - for mnemonic in table.keys(): - # This relies on the internal format - (opcode, name, immediate_operand_size, pops, pushes, gas, description) = table[mnemonic] - print("%02x: %-16s %s" % (opcode, mnemonic, description)) - - sys.exit(0) - - if args.assemble and args.disassemble: - print("You cannot both assemble and disassemble at the same time.") - sys.exit(1) - - if not args.assemble and not args.disassemble: - args.disassemble = True - - if args.assemble: - asm = args.input.read().strip().rstrip() - args.output.write(EVMAsm.assemble_hex(asm) + "\n") - - if args.disassemble: - buf = args.input.read().strip().rstrip() - if buf[:3] == 'EVM': # binja prefix - buf = buf[3:] - elif buf[:2] == '0x': # hex prefixed - buf = binascii.unhexlify(buf[2:]) - else: # detect all hex buffer - buf_set = set() - for c in buf: - buf_set.add(c.lower()) - - hex_set = set(list('0123456789abcdef')) - if buf_set <= hex_set: # subset - buf = binascii.unhexlify(buf) - - insns = list(EVMAsm.disassemble_all(buf)) - for i in insns: - args.output.write("%08x: %s\n" %(i.pc, str(i))) - -if __name__ == "__main__": - main() - diff --git a/pyevmasm/__init__.py b/pyevmasm/__init__.py index 42cdeed..139ef8a 100644 --- a/pyevmasm/__init__.py +++ b/pyevmasm/__init__.py @@ -1 +1,3 @@ -from .evmasm import EVMAsm \ No newline at end of file +from .evmasm import instruction_table, Instruction # noqa: F401 +from .evmasm import assemble, assemble_all, assemble_hex, assemble_one +from .evmasm import disassemble, disassemble_all, disassemble_hex, disassemble_one diff --git a/pyevmasm/__main__.py b/pyevmasm/__main__.py new file mode 100755 index 0000000..ad6b99a --- /dev/null +++ b/pyevmasm/__main__.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +import argparse +import sys +import binascii + +from .evmasm import assemble_hex, disassemble_all, instruction_table, assemble_all + + +def main(): + parser = argparse.ArgumentParser(description="pyevmasm the EVM assembler and disassembler") + group_action = parser.add_mutually_exclusive_group(required=True) + group_action.add_argument('-a', '--assemble', action='store_true', help='Assemble EVM instructions to opcodes') + group_action.add_argument('-d', '--disassemble', action='store_true', help='Disassemble EVM to opcodes') + group_action.add_argument('-t', '--print-opcode-table', action='store_true', help='List supported EVM opcodes') + parser.add_argument('-bi', '--binary-input', action='store_true', help='Binary input mode (-d only)') + parser.add_argument('-bo', '--binary-output', action='store_true', help='Binary output mode (-a only)') + parser.add_argument('-i', '--input', nargs='?', default=sys.stdin, type=argparse.FileType('r'), + help='Input file, default=stdin') + parser.add_argument('-o', '--output', nargs='?', default=sys.stdout, type=argparse.FileType('w'), + help='Output file, default=stdout') + + args = parser.parse_args(sys.argv[1:]) + + if args.print_opcode_table: + for instr in instruction_table: + print('0x{:02x}: {:16s} {:s}'.format(instr.opcode, instr.name, instr.description)) + sys.exit(0) + + if args.assemble: + try: + asm = args.input.read().strip().rstrip() + except KeyboardInterrupt: + sys.exit(0) + if args.binary_output: + for i in assemble_all(asm): + if sys.version_info >= (3, 2): + args.output.buffer.write(i.bytes) + else: + args.output.write(i.bytes) + else: + args.output.write(assemble_hex(asm) + "\n") + + if args.disassemble: + if args.binary_input and sys.version_info >= (3, 2): + buf = args.input.buffer.read() + else: + try: + buf = args.input.read().strip().rstrip() + except KeyboardInterrupt: + sys.exit(0) + except UnicodeDecodeError: + print('Input is binary? try using -b.') + sys.exit(1) + + if buf[:3] == 'EVM': # binja prefix + buf = buf[3:] + elif buf[:2] == '0x': # hex prefixed + buf = binascii.unhexlify(buf[2:]) + else: # detect all hex buffer + buf_set = set() + for c in buf: + buf_set.add(c.lower()) + + hex_set = set(list('0123456789abcdef')) + if buf_set <= hex_set: # subset + buf = binascii.unhexlify(buf) + + insns = list(disassemble_all(buf)) + for i in insns: + args.output.write("%08x: %s\n" % (i.pc, str(i))) + + +if __name__ == "__main__": + main() diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index 22dc1f9..f1c955a 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -1,304 +1,123 @@ -from builtins import map, next, chr, range, object from binascii import hexlify, unhexlify -from .util import memoized +from builtins import map, next, range, object +from future.builtins import next, bytes + +""" + Example use:: + >>> from pyevmasm import * + >>> disassemble_one('\\x60\\x10') + Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) + >>> assemble_one('PUSH1 0x10') + Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) + >>> tuple(disassemble_all('\\x30\\x31')) + (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), + Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) + >>> tuple(assemble_all('ADDRESS\\nBALANCE')) + (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), + Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) + >>> assemble_hex('''PUSH1 0x60\n \ + BLOCKHASH\n \ + MSTORE\n \ + PUSH1 0x2\n \ + PUSH2 0x100\n \ + ''') + '0x606040526002610100' + >>> disassemble_hex('0x606040526002610100') + 'PUSH1 0x60\\nBLOCKHASH\\nMSTORE\\nPUSH1 0x2\\nPUSH2 0x100' + +""" + + +class UnknownMnemonicError(Exception): + pass + + +class UnknownOpcodeError(Exception): + pass + + +class InstructionTable(dict): + """ + EVM Instruction factory + Implements an immutable, iterable instruction LUT that can be indexed by both mnemonic or opcode. + + Example:: + + >>> from pyevmasm import instruction_table + >>> instruction_table[0] + Instruction(0x0, 'STOP', 0, 0, 0, 0, 'Halts execution.', None, 0) + >>> instruction_table['STOP'] + Instruction(0x0, 'STOP', 0, 0, 0, 0, 'Halts execution.', None, 0) + >>> i = instruction_table.__iter__() + >>> i.__next__() + Instruction(0x0, 'STOP', 0, 0, 0, 0, 'Halts execution.', None, 0) + >>> i.__next__() + Instruction(0x1, 'ADD', 0, 2, 1, 3, 'Addition operation.', None, 0) + >>> i.__next__() + Instruction(0x2, 'MUL', 0, 2, 1, 5, 'Multiplication operation.', None, 0) + >>> i.__next__() + Instruction(0x3, 'SUB', 0, 2, 1, 3, 'Subtraction operation.', None, 0) + + """ + + def __init__(self, *args, **kwargs): + super(InstructionTable, self).__init__(*args, **kwargs) + # Don't build the reverse LUT unless we actually use it + self._rtable = None + + def _reverse_lut(self): + reverse_table = {} + for (opcode, (name, immediate_operand_size, pops, pushes, gas, description)) in self.items(): + mnemonic = name + if name == 'PUSH': + mnemonic = '%s%d' % (name, (opcode & 0x1f) + 1) + elif name in ('SWAP', 'LOG', 'DUP'): + mnemonic = '%s%d' % (name, (opcode & 0xf) + 1) + reverse_table[mnemonic] = opcode, name, immediate_operand_size, pops, pushes, gas, description + return reverse_table + @property + def reverse_table(self): + if not self._rtable: + self._rtable = self._reverse_lut() + return self._rtable -class EVMAsm(object): - ''' - EVM Instruction factory + def __iter__(self): + self._k = iter(self.keys()) + return self - Example use:: + def next(self): + # For Python 2 support + return self.__next__() + + def __next__(self): + opcode = next(self._k) + return Instruction(opcode, *super(InstructionTable, self).__getitem__(opcode)) - >>> from evmasm import EVMAsm - >>> EVMAsm.disassemble_one('\\x60\\x10') - Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) - >>> EVMAsm.assemble_one('PUSH1 0x10') - Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) - >>> tuple(EVMAsm.disassemble_all('\\x30\\x31')) - (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), - Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) - >>> tuple(EVMAsm.assemble_all('ADDRESS\\nBALANCE')) - (Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0), - Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1)) - >>> EVMAsm.assemble_hex( - ... """PUSH1 0x60 - ... BLOCKHASH - ... MSTORE - ... PUSH1 0x2 - ... PUSH2 0x100 - ... """ - ... ) - '0x606040526002610100' - >>> EVMAsm.disassemble_hex('0x606040526002610100') - 'PUSH1 0x60\\nBLOCKHASH\\nMSTORE\\nPUSH1 0x2\\nPUSH2 0x100' - ''' - class Instruction(object): - def __init__(self, opcode, name, operand_size, pops, pushes, fee, description, operand=None, pc=0): - ''' - This represents an EVM instruction. - EVMAsm will create this for you. - - :param opcode: the opcode value - :param name: instruction name - :param operand_size: immediate operand size in bytes - :param pops: number of items popped from the stack - :param pushes: number of items pushed into the stack - :param fee: gas fee for the instruction - :param description: textual description of the instruction - :param operand: optional immediate operand - :param pc: optional program counter of this instruction in the program - - Example use:: - - instruction = EVMAsm.assemble_one('PUSH1 0x10') - print 'Instruction: %s'% instruction - print '\tdescription:', instruction.description - print '\tgroup:', instruction.group - print '\tpc:', instruction.pc - print '\tsize:', instruction.size - print '\thas_operand:', instruction.has_operand - print '\toperand_size:', instruction.operand_size - print '\toperand:', instruction.operand - print '\tsemantics:', instruction.semantics - print '\tpops:', instruction.pops - print '\tpushes:', instruction.pushes - print '\tbytes:', '0x'+instruction.bytes.encode('hex') - print '\twrites to stack:', instruction.writes_to_stack - print '\treads from stack:', instruction.reads_from_stack - print '\twrites to memory:', instruction.writes_to_memory - print '\treads from memory:', instruction.reads_from_memory - print '\twrites to storage:', instruction.writes_to_storage - print '\treads from storage:', instruction.reads_from_storage - print '\tis terminator', instruction.is_terminator - - - ''' - self._opcode = opcode - self._name = name - self._operand_size = operand_size - self._pops = pops - self._pushes = pushes - self._fee = fee - self._description = description - self._operand = operand # Immediate operand if any - if operand_size != 0 and operand is not None: - mask = (1 << operand_size * 8) - 1 - if ~mask & operand: - raise ValueError("operand should be %d bits long" % (operand_size * 8)) - self._pc = pc - - def __eq__(self, other): - ''' Instructions are equal if all features match ''' - return self._opcode == other._opcode and\ - self._name == other._name and\ - self._operand == other._operand and\ - self._operand_size == other._operand_size and\ - self._pops == other._pops and\ - self._pushes == other._pushes and\ - self._fee == other._fee and\ - self._pc == other._pc and\ - self._description == other._description - - def __repr__(self): - output = 'Instruction(0x%x, %r, %d, %d, %d, %d, %r, %r, %r)' % (self._opcode, self._name, self._operand_size, - self._pops, self._pushes, self._fee, self._description, self._operand, self._pc) - return output - - def __str__(self): - output = self.name + (' 0x%x' % self.operand if self.has_operand else '') - return output - - @property - def opcode(self): - ''' The opcode as an integer ''' - return self._opcode - - @property - def name(self): - ''' The instruction name/mnemonic ''' - if self._name == 'PUSH': - return 'PUSH%d' % self.operand_size - elif self._name == 'DUP': - return 'DUP%d' % self.pops - elif self._name == 'SWAP': - return 'SWAP%d' % (self.pops - 1) - elif self._name == 'LOG': - return 'LOG%d' % (self.pops - 2) - return self._name - - def parse_operand(self, buf): - ''' Parses an operand from buf - - :param buf: a buffer - :type buf: iterator/generator/string - ''' - buf = iter(buf) + def __getitem__(self, item): + if isinstance(item, slice): + raise NotImplementedError + if isinstance(item, int): try: - operand = 0 - for _ in range(self.operand_size): - operand <<= 8 - operand |= next(buf) - self._operand = operand - except StopIteration: - raise Exception("Not enough data for decoding") - - @property - def operand_size(self): - ''' The immediate operand size ''' - return self._operand_size - - @property - def has_operand(self): - ''' True if the instruction uses an immediate operand''' - return self.operand_size > 0 - - @property - def operand(self): - ''' The immediate operand ''' - return self._operand - - @property - def pops(self): - '''Number words popped from the stack''' - return self._pops - - @property - def pushes(self): - '''Number words pushed to the stack''' - return self._pushes - - @property - def size(self): - ''' Size of the encoded instruction ''' - return self._operand_size + 1 - - @property - def fee(self): - ''' The basic gas fee of the instruction ''' - return self._fee - - @property - def semantics(self): - ''' Canonical semantics ''' - return self._name - - @property - def description(self): - ''' Coloquial description of the instruction ''' - return self._description - - @property - def bytes(self): - ''' Encoded instruction ''' - bytes = [] - bytes.append(chr(self._opcode)) - for offset in reversed(range(self.operand_size)): - c = (self.operand >> offset * 8) & 0xff - bytes.append(chr(c)) - return ''.join(bytes) - - @property - def pc(self): - '''Location in the program (optional)''' - return self._pc - - @property - def group(self): - '''Instruction classification as per the yellow paper''' - classes = { - 0: 'Stop and Arithmetic Operations', - 1: 'Comparison & Bitwise Logic Operations', - 2: 'SHA3', - 3: 'Environmental Information', - 4: 'Block Information', - 5: 'Stack, Memory, Storage and Flow Operations', - 6: 'Push Operations', - 7: 'Push Operations', - 8: 'Duplication Operations', - 9: 'Exchange Operations', - 0xa: 'Logging Operations', - 0xf: 'System operations' - } - return classes.get(self.opcode >> 4, 'Invalid instruction') - - @property - def uses_stack(self): - ''' True if the instruction reads/writes from/to the stack ''' - return self.reads_from_stack or self.writes_to_stack - - @property - def reads_from_stack(self): - ''' True if the instruction reads from stack ''' - return self.pops > 0 - - @property - def writes_to_stack(self): - ''' True if the instruction writes to the stack ''' - return self.pushes > 0 - - @property - def writes_to_memory(self): - ''' True if the instruction writes to memory ''' - return self.semantics in ('MSTORE', 'MSTORE8', 'CALLDATACOPY', 'CODECOPY', 'EXTCODECOPY') - - @property - def reads_from_memory(self): - ''' True if the instruction reads from memory ''' - return self.semantics in ('MLOAD', 'CREATE', 'CALL', 'CALLCODE', 'RETURN', 'DELEGATECALL', 'REVERT') - - @property - def writes_to_storage(self): - ''' True if the instruction writes to the storage ''' - return self.semantics in ('SSTORE') - - @property - def reads_from_storage(self): - ''' True if the instruction reads from the storage ''' - return self.semantics in ('SLOAD') - - @property - def is_terminator(self): - ''' True if the instruction is a basic block terminator ''' - return self.semantics in ('RETURN', 'STOP', 'INVALID', 'JUMP', 'JUMPI', 'SELFDESTRUCT', 'REVERT') - - @property - def is_endtx(self): - ''' True if the instruction is a transaction terminator ''' - return self.semantics in ('RETURN', 'STOP', 'INVALID', 'SELFDESTRUCT', 'REVERT') - - @property - def is_starttx(self): - ''' True if the instruction is a transaction initiator ''' - return self.semantics in ('CREATE', 'CALL', 'CALLCODE', 'DELEGATECALL') - - @property - def is_branch(self): - ''' True if the instruction is a jump''' - return self.semantics in ('JUMP', 'JUMPI') - - @property - def is_environmental(self): - ''' True if the instruction access enviromental data ''' - return self.group == 'Environmental Information' - - @property - def is_system(self): - ''' True if the instruction is a system operation ''' - return self.group == 'System operations' - - @property - def uses_block_info(self): - ''' True if the instruction access block information''' - return self.group == 'Block Information' - - @property - def is_arithmetic(self): - ''' True if the instruction is an arithmetic operation ''' - return self.semantics in ('ADD', 'MUL', 'SUB', 'DIV', 'SDIV', 'MOD', 'SMOD', 'ADDMOD', 'MULMOD', 'EXP', 'SIGNEXTEND') - - # from http://gavwood.com/paper.pdf - _table = { # opcode: (name, immediate_operand_size, pops, pushes, gas, description) + return Instruction(item, *super(InstructionTable, self).__getitem__(item)) + except KeyError: + try: + return Instruction(*self.reverse_table['INVALID']) + except KeyError: + raise UnknownOpcodeError(item) + elif isinstance(item, str): + try: + return Instruction(*self.reverse_table[item]) + except KeyError: + raise UnknownMnemonicError(item) + + def __setitem__(self, key, value): + return + +# from http://gavwood.com/paper.pdf +instruction_table = InstructionTable({ + # opcode: (name, immediate_operand_size, pops, pushes, gas, description) 0x00: ('STOP', 0, 0, 0, 0, 'Halts execution.'), 0x01: ('ADD', 0, 2, 1, 3, 'Addition operation.'), 0x02: ('MUL', 0, 2, 1, 5, 'Multiplication operation.'), @@ -434,240 +253,525 @@ def is_arithmetic(self): 0xfd: ('REVERT', 0, 2, 0, 0, 'Stop execution and revert state changes, without consuming all provided gas and providing a reason.'), 0xfe: ('INVALID', 0, 0, 0, 0, 'Designated invalid instruction.'), 0xff: ('SELFDESTRUCT', 0, 1, 0, 5000, 'Halt execution and register account for later deletion.') - } + }) - @staticmethod - @memoized - def _get_reverse_table(): - ''' Build an internal table used in the assembler ''' - reverse_table = {} - for (opcode, (name, immediate_operand_size, pops, pushes, gas, description)) in list(EVMAsm._table.items()): - mnemonic = name - if name == 'PUSH': - mnemonic = '%s%d' % (name, (opcode & 0x1f) + 1) - elif name in ('SWAP', 'LOG', 'DUP'): - mnemonic = '%s%d' % (name, (opcode & 0xf) + 1) - reverse_table[mnemonic] = opcode, name, immediate_operand_size, pops, pushes, gas, description - return reverse_table +class Instruction(object): + def __init__(self, opcode, name, operand_size, pops, pushes, fee, description, operand=None, pc=0): + """ + This represents an EVM instruction. + EVMAsm will create this for you. - @staticmethod - def assemble_one(assembler, pc=0): - ''' Assemble one EVM instruction from its textual representation. + :param opcode: the opcode value + :param name: instruction name + :param operand_size: immediate operand size in bytes + :param pops: number of items popped from the stack + :param pushes: number of items pushed into the stack + :param fee: gas fee for the instruction + :param description: textual description of the instruction + :param operand: optional immediate operand + :param pc: optional program counter of this instruction in the program - :param assembler: assembler code for one instruction - :param pc: program counter of the instruction(optional) - :return: An Instruction object + Example use:: - Example use:: + >>> instruction = assemble_one('PUSH1 0x10') + >>> print('Instruction: %s'% instruction) + >>> print('\tdescription:', instruction.description) + >>> print('\tgroup:', instruction.group) + >>> print('\tpc:', instruction.pc) + >>> print('\tsize:', instruction.size) + >>> print('\thas_operand:', instruction.has_operand) + >>> print('\toperand_size:', instruction.operand_size) + >>> print('\toperand:', instruction.operand) + >>> print('\tsemantics:', instruction.semantics) + >>> print('\tpops:', instruction.pops) + >>> print('\tpushes:', instruction.pushes) + >>> print('\tbytes:', '0x'+instruction.bytes.encode('hex')) + >>> print('\twrites to stack:', instruction.writes_to_stack) + >>> print('\treads from stack:', instruction.reads_from_stack) + >>> print('\twrites to memory:', instruction.writes_to_memory) + >>> print('\treads from memory:', instruction.reads_from_memory) + >>> print('\twrites to storage:', instruction.writes_to_storage) + >>> print('\treads from storage:', instruction.reads_from_storage) + >>> print('\tis terminator', instruction.is_terminator) + + + """ + self._opcode = opcode + self._name = name + self._operand_size = operand_size + self._pops = pops + self._pushes = pushes + self._fee = fee + self._description = description + self._operand = operand # Immediate operand if any + self._pc = pc + + def __eq__(self, other): + """ Instructions are equal if all features match """ + return self._opcode == other._opcode and \ + self._name == other._name and \ + self._operand == other._operand and \ + self._operand_size == other._operand_size and \ + self._pops == other._pops and \ + self._pushes == other._pushes and \ + self._fee == other._fee and \ + self._pc == other._pc and \ + self._description == other._description + + def __repr__(self): + output = 'Instruction(0x%x, %r, %d, %d, %d, %d, %r, %r, %r)' % ( + self._opcode, self._name, self._operand_size, + self._pops, self._pushes, self._fee, self._description, self._operand, self._pc) + return output + + def __str__(self): + return self.name + (' 0x%x' % self.operand if self.has_operand else '') + + @property + def opcode(self): + """ The opcode as an integer """ + return self._opcode + + @property + def mnemonic(self): + """ Alias for name """ + return self.name + + @property + def name(self): + """ The instruction name/mnemonic """ + if self._name == 'PUSH': + return 'PUSH%d' % self.operand_size + elif self._name == 'DUP': + return 'DUP%d' % self.pops + elif self._name == 'SWAP': + return 'SWAP%d' % (self.pops - 1) + elif self._name == 'LOG': + return 'LOG%d' % (self.pops - 2) + return self._name + + def parse_operand(self, buf): + """ Parses an operand from buf + + :param buf: a buffer + :type buf: iterator/generator/string + """ + buf = iter(buf) + try: + operand = 0 + for _ in range(self.operand_size): + operand <<= 8 + operand |= next(buf) + self._operand = operand + except StopIteration: + raise Exception("Not enough data for decoding") + + @property + def operand_size(self): + """ The immediate operand size """ + return self._operand_size + + @property + def has_operand(self): + """ True if the instruction uses an immediate operand""" + return self.operand_size > 0 + + @property + def operand(self): + return self._operand + + @operand.setter + def operand(self, value): + if self.operand_size != 0 and value is not None: + mask = (1 << self.operand_size * 8) - 1 + if ~mask & value: + raise ValueError("operand should be %d bits long" % (self.operand_size * 8)) + self._operand = value + + @property + def pops(self): + """ Number words popped from the stack """ + return self._pops + + @property + def pushes(self): + """ Number words pushed to the stack """ + return self._pushes + + @property + def size(self): + """ Size of the encoded instruction """ + return self._operand_size + 1 + + @property + def fee(self): + """ The basic gas fee of the instruction """ + return self._fee + + @property + def semantics(self): + """ Canonical semantics """ + return self._name + + @property + def description(self): + """ Colloquial description of the instruction """ + return self._description + + @property + def bytes(self): + """ Encoded instruction """ + b = [bytes([self._opcode])] + for offset in reversed(range(self.operand_size)): + b.append(bytes([(self.operand >> offset * 8) & 0xff])) + return b''.join(b) + + @property + def pc(self): + return self._pc + + @pc.setter + def pc(self, value): + """Location in the program (optional)""" + self._pc = value + + @property + def group(self): + """ Instruction classification as per the yellow paper """ + classes = { + 0: 'Stop and Arithmetic Operations', + 1: 'Comparison & Bitwise Logic Operations', + 2: 'SHA3', + 3: 'Environmental Information', + 4: 'Block Information', + 5: 'Stack, Memory, Storage and Flow Operations', + 6: 'Push Operations', + 7: 'Push Operations', + 8: 'Duplication Operations', + 9: 'Exchange Operations', + 0xa: 'Logging Operations', + 0xf: 'System operations' + } + return classes.get(self.opcode >> 4, 'Invalid instruction') + + @property + def uses_stack(self): + """ True if the instruction reads/writes from/to the stack """ + return self.reads_from_stack or self.writes_to_stack + + @property + def reads_from_stack(self): + """ True if the instruction reads from stack """ + return self.pops > 0 + + @property + def writes_to_stack(self): + """ True if the instruction writes to the stack """ + return self.pushes > 0 + + @property + def writes_to_memory(self): + """ True if the instruction writes to memory """ + return self.semantics in ('MSTORE', 'MSTORE8', 'CALLDATACOPY', 'CODECOPY', 'EXTCODECOPY') + + @property + def reads_from_memory(self): + """ True if the instruction reads from memory """ + return self.semantics in ('MLOAD', 'CREATE', 'CALL', 'CALLCODE', 'RETURN', 'DELEGATECALL', 'REVERT') + + @property + def writes_to_storage(self): + """ True if the instruction writes to the storage """ + return self.semantics in 'SSTORE' + + @property + def reads_from_storage(self): + """ True if the instruction reads from the storage """ + return self.semantics in 'SLOAD' + + @property + def is_terminator(self): + """ True if the instruction is a basic block terminator """ + return self.semantics in ('RETURN', 'STOP', 'INVALID', 'JUMP', 'JUMPI', 'SELFDESTRUCT', 'REVERT') + + @property + def is_endtx(self): + """ True if the instruction is a transaction terminator """ + return self.semantics in ('RETURN', 'STOP', 'INVALID', 'SELFDESTRUCT', 'REVERT') + + @property + def is_starttx(self): + """ True if the instruction is a transaction initiator """ + return self.semantics in ('CREATE', 'CALL', 'CALLCODE', 'DELEGATECALL') + + @property + def is_branch(self): + """ True if the instruction is a jump """ + return self.semantics in ('JUMP', 'JUMPI') + + @property + def is_environmental(self): + """ True if the instruction access enviromental data """ + return self.group == 'Environmental Information' + + @property + def is_system(self): + """ True if the instruction is a system operation """ + return self.group == 'System operations' + + @property + def uses_block_info(self): + """ True if the instruction access block information""" + return self.group == 'Block Information' + + @property + def is_arithmetic(self): + """ True if the instruction is an arithmetic operation """ + return self.semantics in ( + 'ADD', 'MUL', 'SUB', 'DIV', 'SDIV', 'MOD', 'SMOD', 'ADDMOD', 'MULMOD', 'EXP', 'SIGNEXTEND') + + +def assemble_one(asmcode, pc=0): + """ Assemble one EVM instruction from its textual representation. + + :param asmcode: assembly code for one instruction + :type asmcode: str + :param pc: program counter of the instruction(optional) + :type pc: int + :return: An Instruction object + :rtype: Instruction - >>> print evm.EVMAsm.assemble_one('LT') + Example use:: + >>> print assemble_one('LT') - ''' - try: - _reverse_table = EVMAsm._get_reverse_table() - assembler = assembler.strip().split(' ') - opcode, name, operand_size, pops, pushes, gas, description = _reverse_table[assembler[0].upper()] - if operand_size > 0: - assert len(assembler) == 2 - operand = int(assembler[1], 0) - else: - assert len(assembler) == 1 - operand = None - - return EVMAsm.Instruction(opcode, name, operand_size, pops, pushes, gas, description, operand=operand, pc=pc) - except BaseException: - raise Exception("Something wrong at pc %d" % pc) - - @staticmethod - def assemble_all(assembler, pc=0): - ''' Assemble a sequence of textual representation of EVM instructions - - :param assembler: assembler code for any number of instructions - :param pc: program counter of the first instruction(optional) - :return: An generator of Instruction objects - - Example use:: - - >>> evm.EVMAsm.encode_one("""PUSH1 0x60 - PUSH1 0x40 - MSTORE - PUSH1 0x2 - PUSH2 0x108 - PUSH1 0x0 - POP - SSTORE - PUSH1 0x40 - MLOAD - """) - - ''' - if isinstance(assembler, str): - assembler = assembler.split('\n') - assembler = iter(assembler) - for line in assembler: - if not line.strip(): - continue - instr = EVMAsm.assemble_one(line, pc=pc) - yield instr - pc += instr.size - - @staticmethod - def disassemble_one(bytecode, pc=0): - ''' Decode a single instruction from a bytecode - - :param bytecode: the bytecode stream - :type bytecode: bytearray or str - :param pc: program counter of the instruction(optional) - :type bytecode: iterator/sequence/str - :return: an Instruction object - - Example use:: - - >>> print EVMAsm.disassemble_one('\x60\x10') - - ''' - if isinstance(bytecode, (str, bytes)): - bytecode = bytearray(bytecode.encode()) - bytecode = iter(bytecode) + + """ + try: + asmcode = asmcode.strip().split(' ') + instr = instruction_table[asmcode[0].upper()] + if pc: + instr.pc = pc + if instr.operand_size > 0: + assert len(asmcode) == 2 + instr.operand = int(asmcode[1], 0) + return instr + except BaseException: + raise Exception("Something wrong at pc %d" % pc) + + +def assemble_all(asmcode, pc=0): + """ Assemble a sequence of textual representation of EVM instructions + + :param asmcode: assembly code for any number of instructions + :type asmcode: str + :param pc: program counter of the first instruction(optional) + :type pc: int + :return: An generator of Instruction objects + :rtype: generator[Instructions] + + Example use:: + + >>> assemble_one('''PUSH1 0x60\n \ + PUSH1 0x40\n \ + MSTORE\n \ + PUSH1 0x2\n \ + PUSH2 0x108\n \ + PUSH1 0x0\n \ + POP\n \ + SSTORE\n \ + PUSH1 0x40\n \ + MLOAD\n \ + ''') + + """ + asmcode = asmcode.split('\n') + asmcode = iter(asmcode) + for line in asmcode: + if not line.strip(): + continue + instr = assemble_one(line, pc=pc) + yield instr + pc += instr.size + + +def disassemble_one(bytecode, pc=0): + """ Disassemble a single instruction from a bytecode + + :param bytecode: the bytecode stream + :type bytecode: str | bytes | bytearray | iterator + :param pc: program counter of the instruction(optional) + :type pc: int + :return: an Instruction object + :rtype: Instruction + + Example use:: + + >>> print disassemble_one('\x60\x10') + + """ + if isinstance(bytecode, bytes): + bytecode = bytearray(bytecode) + if isinstance(bytecode, str): + bytecode = bytearray(bytecode.encode('latin-1')) + + bytecode = iter(bytecode) + try: opcode = next(bytecode) - assert isinstance(opcode, int) - - invalid = ('INVALID', 0, 0, 0, 0, 'Unknown opcode') - name, operand_size, pops, pushes, gas, description = EVMAsm._table.get(opcode, invalid) - instruction = EVMAsm.Instruction(opcode, name, operand_size, pops, pushes, gas, description, pc=pc) - if instruction.has_operand: - instruction.parse_operand(bytecode) - - return instruction - - @staticmethod - def disassemble_all(bytecode, pc=0): - ''' Decode all instructions in bytecode - - :param bytecode: an evm bytecode (binary) - :param pc: program counter of the first instruction(optional) - :type bytecode: iterator/sequence/str - :return: An generator of Instruction objects - - Example use:: - - >>> for inst in EVMAsm.decode_all(bytecode): - ... print inst - - ... - PUSH1 0x60 - PUSH1 0x40 - MSTORE - PUSH1 0x2 - PUSH2 0x108 - PUSH1 0x0 - POP - SSTORE - PUSH1 0x40 - MLOAD - - - ''' - - if isinstance(bytecode, str): - bytecode = bytearray(bytecode.encode()) - bytecode = iter(bytecode) - while True: - instr = EVMAsm.disassemble_one(bytecode, pc=pc) - pc += instr.size - yield instr - - @staticmethod - def disassemble(bytecode, pc=0): - ''' Disassemble an EVM bytecode - - :param bytecode: binary representation of an evm bytecode (hexadecimal) - :param pc: program counter of the first instruction(optional) - :type bytecode: str - :return: the text representation of the aseembler code - - Example use:: - - >>> EVMAsm.disassemble("\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00") - ... - PUSH1 0x60 - BLOCKHASH - MSTORE - PUSH1 0x2 - PUSH2 0x100 - - ''' - return '\n'.join(map(str, EVMAsm.disassemble_all(bytecode, pc=pc))) - - @staticmethod - def assemble(asmcode, pc=0): - ''' Assemble an EVM program - - :param asmcode: an evm assembler program - :param pc: program counter of the first instruction(optional) - :type asmcode: str - :return: the hex representation of the bytecode - - Example use:: - - >>> EVMAsm.assemble( """PUSH1 0x60 - BLOCKHASH - MSTORE - PUSH1 0x2 - PUSH2 0x100 - """ - ) - ... - "\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00" - ''' - return ''.join([x.bytes for x in EVMAsm.assemble_all(asmcode, pc=pc)]) - - @staticmethod - def disassemble_hex(bytecode, pc=0): - ''' Disassemble an EVM bytecode - - :param bytecode: canonical representation of an evm bytecode (hexadecimal) - :param pc: program counter of the first instruction(optional) - :type bytecode: str - :return: the text representation of the aseembler code - - Example use:: - - >>> EVMAsm.disassemble_hex("0x6060604052600261010") - ... - PUSH1 0x60 - BLOCKHASH - MSTORE - PUSH1 0x2 - PUSH2 0x100 - - ''' - if bytecode.startswith('0x'): - bytecode = bytecode[2:] - bytecode = unhexlify(bytecode.encode()) - return EVMAsm.disassemble(bytecode, pc=pc) - - @staticmethod - def assemble_hex(asmcode, pc=0): - ''' Assemble an EVM program - - :param asmcode: an evm assembler program - :param pc: program counter of the first instruction(optional) - :type asmcode: str - :return: the hex representation of the bytecode - - Example use:: - - >>> EVMAsm.assemble_hex( """PUSH1 0x60 - BLOCKHASH - MSTORE - PUSH1 0x2 - PUSH2 0x100 - """ - ) - ... - "0x6060604052600261010" - ''' - return '0x' + hexlify(EVMAsm.assemble(asmcode, pc=pc).encode()).decode() \ No newline at end of file + except StopIteration: + return + + assert isinstance(opcode, int) + + instruction = instruction_table[opcode] + instruction.pc = pc + + if instruction.has_operand: + instruction.parse_operand(bytecode) + + return instruction + + +def disassemble_all(bytecode, pc=0): + """ Disassemble all instructions in bytecode + + :param bytecode: an evm bytecode (binary) + :type bytecode: str | bytes | bytearray | iterator + :param pc: program counter of the first instruction(optional) + :type pc: int + :return: An generator of Instruction objects + :rtype: list[Instruction] + + Example use:: + + >>> for inst in disassemble_all(bytecode): + ... print(instr) + + ... + PUSH1 0x60 + PUSH1 0x40 + MSTORE + PUSH1 0x2 + PUSH2 0x108 + PUSH1 0x0 + POP + SSTORE + PUSH1 0x40 + MLOAD + + + """ + + if isinstance(bytecode, bytes): + bytecode = bytearray(bytecode) + if isinstance(bytecode, str): + bytecode = bytearray(bytecode.encode('latin-1')) + + bytecode = iter(bytecode) + while True: + instr = disassemble_one(bytecode, pc=pc) + if not instr: + return + pc += instr.size + yield instr + + +def disassemble(bytecode, pc=0): + """ Disassemble an EVM bytecode + + :param bytecode: binary representation of an evm bytecode + :type bytecode: str | bytes | bytearray + :param pc: program counter of the first instruction(optional) + :type pc: int + :return: the text representation of the assembler code + + Example use:: + + >>> disassemble("\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00") + ... + PUSH1 0x60 + BLOCKHASH + MSTORE + PUSH1 0x2 + PUSH2 0x100 + + """ + return '\n'.join(map(str, disassemble_all(bytecode, pc=pc))) + + +def assemble(asmcode, pc=0): + """ Assemble an EVM program + + :param asmcode: an evm assembler program + :type asmcode: str + :param pc: program counter of the first instruction(optional) + :type pc: int + :return: the hex representation of the bytecode + :rtype: str + + Example use:: + + >>> assemble('''PUSH1 0x60\n \ + BLOCKHASH\n \ + MSTORE\n \ + PUSH1 0x2\n \ + PUSH2 0x100\n \ + ''') + ... + b"\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00" + """ + return b''.join([x.bytes for x in assemble_all(asmcode, pc=pc)]) + + +def disassemble_hex(bytecode, pc=0): + """ Disassemble an EVM bytecode + + :param bytecode: canonical representation of an evm bytecode (hexadecimal) + :type bytecode: str + :param pc: program counter of the first instruction(optional) + :type pc: int + :return: the text representation of the assembler code + :rtype: str + + Example use:: + + >>> disassemble_hex("0x6060604052600261010") + ... + PUSH1 0x60 + BLOCKHASH + MSTORE + PUSH1 0x2 + PUSH2 0x100 + + """ + if bytecode.startswith('0x'): + bytecode = bytecode[2:] + bytecode = unhexlify(bytecode) + return disassemble(bytecode, pc=pc) + + +def assemble_hex(asmcode, pc=0): + """ Assemble an EVM program + + :param asmcode: an evm assembler program + :type asmcode: str | iterator[Instruction] + :param pc: program counter of the first instruction(optional) + :type pc: int + :return: the hex representation of the bytecode + :rtype: str + + Example use:: + + >>> assemble_hex('''PUSH1 0x60\n \ + BLOCKHASH\n \ + MSTORE\n \ + PUSH1 0x2\n \ + PUSH2 0x100\n \ + ''') + ... + "0x6060604052600261010" + """ + if isinstance(asmcode, list): + return '0x' + hexlify(b''.join([x.bytes for x in asmcode])).decode('ascii') + return '0x' + hexlify(assemble(asmcode, pc=pc)).decode('ascii') diff --git a/pyevmasm/util.py b/pyevmasm/util.py deleted file mode 100644 index 933212a..0000000 --- a/pyevmasm/util.py +++ /dev/null @@ -1,36 +0,0 @@ -import collections -import functools - -from future.builtins import object - - -class memoized(object): - '''Decorator. Caches a function's return value each time it is called. - If called later with the same arguments, the cached value is returned - (not reevaluated). - ''' - - def __init__(self, func): - self.func = func - self.cache = {} - - def __call__(self, *args, **kwargs): - key = args + tuple(sorted(kwargs.items())) - if not isinstance(key, collections.Hashable): - # uncacheable. a list, for instance. - # better to not cache than blow up. - return self.func(*args, **kwargs) - if key in self.cache: - return self.cache[key] - else: - value = self.func(*args, **kwargs) - self.cache[key] = value - return value - - def __repr__(self): - '''Return the function's docstring.''' - return self.func.__doc__ - - def __get__(self, obj, objtype): - '''Support instance methods.''' - return functools.partial(self.__call__, obj) \ No newline at end of file diff --git a/setup.py b/setup.py index 072e214..f0b6b2e 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,6 @@ name='pyevmasm', version='0.1.0', description='Ethereum Virtual Machine (EVM) assembler and disassembler', - scripts=['evmasm'], author='Trail of Bits', author_email='evmasm@trailofbits.com', url='https://github.com/trailofbits/pyevmasm', @@ -13,10 +12,17 @@ python_requires='>2.7', install_requires=[ 'future' - ], + ], extras_require={ 'dev': [ - 'nose' - ] - } + 'nose', + 'coverage', + 'flake8' + ] + }, + entry_points={ + 'console_scripts': [ + 'evmasm = pyevmasm.__main__:main' + ] + } ) diff --git a/tests/test_EVMAssembler.py b/tests/test_EVMAssembler.py index 21ec796..b35f0c0 100644 --- a/tests/test_EVMAssembler.py +++ b/tests/test_EVMAssembler.py @@ -1,50 +1,48 @@ import unittest -from pyevmasm import EVMAsm +import pyevmasm as EVMAsm +# noinspection PyPep8Naming class EVMTest_Assembler(unittest.TestCase): _multiprocess_can_split_ = True - maxDiff=None + maxDiff = None def test_ADD_1(self): - instruction = EVMAsm.disassemble_one('\x60\x10') + instruction = EVMAsm.disassemble_one(b'\x60\x10') self.assertEqual(EVMAsm.Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0), instruction) - instruction = EVMAsm.assemble_one('PUSH1 0x10') EVMAsm.Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0) - - instructions1 = EVMAsm.disassemble_all('\x30\x31') + + instructions1 = EVMAsm.disassemble_all(b'\x30\x31') instructions2 = EVMAsm.assemble_all('ADDRESS\nBALANCE') - self.assertTrue( all(a == b for a,b in zip(instructions1, instructions2))) + self.assertTrue(all(a == b for a, b in zip(instructions1, instructions2))) - #High level simple assembler/disassembler + # High level simple assembler/disassembler bytecode = EVMAsm.assemble_hex( - """PUSH1 0x60 - BLOCKHASH - MSTORE - PUSH1 0x2 - PUSH2 0x100 - """ - ) - self.assertEqual(bytecode, '0x606040526002610100') - - asmcode = EVMAsm.disassemble_hex('0x606040526002610100') - self.assertEqual(asmcode, '''PUSH1 0x60\nBLOCKHASH\nMSTORE\nPUSH1 0x2\nPUSH2 0x100''') - - def test_STOP(self): - insn = EVMAsm.disassemble_one('\x00') - self.assertTrue(str(insn) == 'STOP') - - def test_JUMPI(self): - insn = EVMAsm.disassemble_one('\x57') - self.assertTrue(str(insn) == 'JUMPI') - self.assertTrue(insn.is_branch) - - + """PUSH1 0x80 + BLOCKHASH + MSTORE + PUSH1 0x2 + PUSH2 0x100 + """ + ) + self.assertEqual(bytecode, '0x608040526002610100') + + asmcode = EVMAsm.disassemble_hex('0x608040526002610100') + self.assertEqual(asmcode, '''PUSH1 0x80\nBLOCKHASH\nMSTORE\nPUSH1 0x2\nPUSH2 0x100''') + + def test_STOP(self): + insn = EVMAsm.disassemble_one(b'\x00') + self.assertTrue(insn.mnemonic == 'STOP') + + def test_JUMPI(self): + insn = EVMAsm.disassemble_one(b'\x57') + self.assertTrue(insn.mnemonic == 'JUMPI') + self.assertTrue(insn.is_branch) if __name__ == '__main__': diff --git a/tox.ini b/tox.ini index 1280824..68532a2 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,py36 +envlist = py{27,36} [testenv] deps = .[dev] @@ -18,6 +18,6 @@ max-line-length = 160 exclude = docs/,examples/,scripts/,tests/ [flake8] -ignore = E265,E501,F403,F405,E266,E712,F841,E741,E722,E731 +#ignore = E265,E501,F403,F405,E266,E712,F841,E741,E722,E731 max-line-length = 160 exclude = .tox,.*.egg,.git,docs/,examples/,scripts/,tests/