diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cf85442c4..7ec1392a1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ env: jobs: build-release: name: Build Release Candidate - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 env: BUILD_RELEASE: 1 steps: @@ -30,7 +30,7 @@ jobs: - name: Setup Local Dependencies run: ./scripts/setup-dependencies.sh - name: Build - run: scripts/build.sh + run: ./scripts/build.sh lint: name: Lint runs-on: ubuntu-20.04 @@ -43,12 +43,32 @@ jobs: - name: Setup Local Dependencies run: ./scripts/setup-dependencies.sh - name: Build - run: scripts/build.sh + run: ./scripts/build.sh - name: Lint - run: scripts/lint.sh + run: ./scripts/lint.sh + pylint: + name: Pylint + runs-on: ubuntu-22.04 + continue-on-error: true + timeout-minutes: 10 + strategy: + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Setup Build Env + run: sudo ./scripts/install-build-tools.sh + - name: Lint with Pylint + run: ./scripts/pylint.sh unit-and-integration-test: name: Unit and Integration Tests - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 timeout-minutes: 30 steps: - uses: actions/checkout@v2 @@ -59,9 +79,9 @@ jobs: - name: Setup Local Dependencies run: ./scripts/setup-dependencies.sh - name: Build - run: scripts/build.sh + run: ./scripts/build.sh - name: Run Unit Tests - run: scripts/test.sh + run: ./scripts/test.sh - name: Shorten SHA id: vars run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" @@ -76,7 +96,7 @@ jobs: retention-days: 7 doxygen: name: doxygen - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 with: @@ -94,3 +114,4 @@ jobs: name: OpenCBDC Transaction Processor docs for ${{ steps.vars.outputs.sha_short }} path: ./doxygen_generated/html/* retention-days: 7 + diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..1daa6a69d --- /dev/null +++ b/.pylintrc @@ -0,0 +1,327 @@ +# Documentation: +# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html + +[MAIN] + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. Set to 0 for parallel processesing (default is 1) +jobs=0 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.10 + +# Discover python modules and packages in the file system subtree. +recursive=yes + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Naming style matching correct class constant names. +class-const-naming-style=snake_case + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Naming style matching correct constant names. +const-naming-style=snake_case + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Naming style matching correct variable names. +variable-naming-style=snake_case + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + asyncSetUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + + +[DESIGN] + +# Maximum number of arguments for function / method. +max-args=10 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=24 + +# Maximum number of locals for function / method body. +max-locals=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of return / yield for function / method body. +max-returns=10 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=79 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=yes + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + use-implicit-booleaness-not-comparison-to-string, + use-implicit-booleaness-not-comparison-to-zero + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=7 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + +# Let 'consider-using-join' be raised when the separator to join on would be +# non-empty (resulting in expected fixes of the type: ``"- " + " - +# ".join(items)``) +suggest-join-with-non-empty-separator=yes + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Tells whether to display a full report or only the messages. Keep to toggle it +reports=no + +# Activate the evaluation score. +score=yes + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + + +[VARIABLES] + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=yes + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + diff --git a/README.md b/README.md index f2a8fc1fd..70423e7f1 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,11 @@ The design decisions we made to achieve these goals will help inform policy make If there are significant changes to the repository that may require manual downstream intervention (or other important updates), we will make a [NEWS post](NEWS.md). # Architecture + We have explored several architectures under two broad categories as follows: + ## UHS-Based Transaction Processor + We explored two system architectures for transaction settlement based on an [unspent transaction output (UTXO)](https://en.wikipedia.org/wiki/Unspent_transaction_output) data model and transaction format. Both architectures implement the same schema representing an [unspent hash set (UHS)](https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2018-May/015967.html) abstraction. One architecture provides [linearizability](https://en.wikipedia.org/wiki/linearizability) of transactions, whereas the other only provides [serializability](https://en.wikipedia.org/wiki/Serializability). @@ -35,6 +38,7 @@ By relaxing the ordering constraint, the peak transaction throughput supported b Both architectures handle multiple geo-distributed datacenter outages with a [recovery time objective (RTO)](https://en.wikipedia.org/wiki/Disaster_recovery#Recovery_Time_Objective) of under ten seconds and a [recovery point objective (RPO)](https://en.wikipedia.org/wiki/Disaster_recovery#Recovery_Point_Objective) of zero. There are two UHS-based architectures as follows: + 1. "Atomizer" architecture - Materializes a total ordering of all transactions settled by the system in a linear sequence of batches. - Requires vertical scaling as peak transaction throughput is limited by the performance of a single system component. @@ -49,9 +53,11 @@ There are two UHS-based architectures as follows: Read the [2PC & Atomizer architecture guide](docs/uhs-architectures.md) for a detailed description of the system components and implementation of each architecture. ## Parallel Architecture for Scalably Executing smart Contracts ("PArSEC") + We built a system with a generic virtual machine layer that is capable of performing parallel executions of smart contracts. The architecture is composed of two layers: + 1. A distributed key-value data store with [ACID](https://en.wikipedia.org/wiki/ACID) database properties - This back-end data store is not constrained to any type of data and is agnostic to the execution later. 1. A generic virtual machine layer that executes programs (i.e. smart contracts) and uses the distributed key-value data store to record state @@ -62,6 +68,7 @@ The architecture is composed of two layers: - Unmodified smart contracts from the Ethereum ecosystem can be deployed directly onto our EVM implementation. Read the [PArSEC Architecture Guide](docs/parsec_architecture.md) for more details. + # Contributing and Discussion You can join the [OpenCBDC mailing list](https://dci.mit.edu/opencbdc-interest) to receive updates from technical working groups and learn more about our work. @@ -80,9 +87,9 @@ If you would like to install OpenCBDC and run it on your local machine, follow t 1. [Install Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) 1. Clone the repository (including the submodules using: `--recurse-submodules`) - ``` - git clone --recurse-submodules https://github.com/mit-dci/opencbdc-tx - ``` + ```console + $ git clone --recurse-submodules https://github.com/mit-dci/opencbdc-tx + ``` ## Setup the build environment @@ -96,6 +103,11 @@ Alternatively, if you just want to run the system, skip to the [Run the Code](#r ```console # ./scripts/install-build-tools.sh ``` + Note: Running Homebrew as root on mac via shell script is not supported, so run without sudo and when prompted, enter the root password. + ```console + $ ./scripts/install-build-tools.sh + ``` + 1. Setup project dependencies This script builds and installs a local copy of several build-dependencies that are not widely packaged. @@ -110,7 +122,8 @@ Alternatively, if you just want to run the system, skip to the [Run the Code](#r ``` ### macOS -Note: If you have not already installed the Xcode CLI tools, you will need to do so: + +Note that if you have not already installed the xcode cli tools you will need to: ```console # xcode-select --install @@ -124,7 +137,9 @@ This reference is housed in [an external repository](https://github.com/mit-dci/ ## Running the Code ### UHS-based Architectures (2PC & Atomizer) + See the [2PC & Atomizer User Guide](docs/2pc_atomizer_user_guide.md) + ### PArSEC Architecture See the [PArSEC User Guide](docs/parsec_user_guide.md) @@ -149,7 +164,8 @@ Users can verify the setup by running both unit/integration and end-to-end tests ## E2E Testing with Kubernetes -### Requirements: +### Requirements + - Go (go test library used to run tests) - Minikube - Helm @@ -157,6 +173,53 @@ Users can verify the setup by running both unit/integration and end-to-end tests ### Running the tests: -1. `./scripts/build-docker.sh` -1. `./scripts/test-e2e-minikube.sh` -1. Review results and logs at `testruns//` +```console +$ ./scripts/build-docker.sh +``` + +```console +$ ./scripts/test-e2e-minikube.sh +``` + +Review results and logs at `testruns//` + +## Linting + +### General + +This script checks for newlines at the end of all tracked git files except images. +Then it runs clang-format and clang-tidy on `.cpp` files in the following directories: + `src`, `tests`, `cmake-tests`, `tools`. + +```console +$ ./scripts/lint.sh +``` + +### Python + +Lint all python files according to ruleset defined in `.pylintrc`. +Optional code quality value >= 5.0 and <= 10.0 can be entered as a threshold of failure. + +```console +$ ./scripts/pylint.sh 8.0 +``` + +## Virtual Environment for Python + +`./scripts/install-build-tools.sh` creates a virtual environemnt. +Once run, follow these steps to run python code. + +1. Activate the virtual environment which has the required python version and packages installed. + ```console + $ source ./scripts/activate-venv.sh + ``` + +2. Run python code + ```console + (.py_venv) $ python ./scripts/.py + ``` + +3. Exit the virtual environment + ```console + (.py_venv) $ deactivate + ``` diff --git a/scripts/plot-samples.py b/scripts/plot-samples.py new file mode 100644 index 000000000..d070d1ded --- /dev/null +++ b/scripts/plot-samples.py @@ -0,0 +1,121 @@ +import sys +import os.path +import glob +import argparse +import matplotlib.pyplot as plt +import numpy as np + + +def parse_args(): + ''' + Allow user to specify the directory containing the performance data + example usage: python plot-samples.py + generates plots for all tx_samples in the specified directory + ''' + parser = argparse.ArgumentParser(description= + 'Plot performance data from tx_samples') + # help message for the directory argument + parser.add_argument('-d', '--dir', dest='tests_dir', + action='store', default='.', type=str, + help='Directory containing performance data') + return parser.parse_args() + + +def plot_latency(fname, fig=None, ax1=None, ax2=None): + ''' + Plot the throughput and latency data from a file in the + tx_samples directory - called by scripts/native-system-benchmark.sh + # list, Axes -> void + ''' + x, y, th_moving_avg, rates, tx_vals = [], [], [], [], [] + fresh = False # is this the plot of all data, or just a single plot + + if not fig: # create new axes if necessary + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) + fig.suptitle(fname+" performance data") + fresh = True + + local_file = fname.split('/')[-1].split('_') + filename = f"loadgen_{local_file[-1].split('.')[0]}" + + # get data from file + data = read_in_data(fname) + # first sample in file is reference time 0 + time_start = int(data[0].split()[0]) + + # Format data for plotting + t_prev = 0 + for idx, line in enumerate(data): + d = line.split() + if len(d) < 2: + break + a = line.split() + x.append((int(a[0]) - time_start)/10**9) + y.append(int(a[1])/10**9) + if x[idx] - x[t_prev] > 1: + tx_vals.append(x[idx]) + rates.append(idx - t_prev) + th_moving_avg.append(np.mean(rates)) + t_prev = idx + + # get line of best fit + f1, f2 = np.polyfit(x, y, 1) + f1 = round(f1, 3) + f2 = round(f2, 3) + + # plot latency data + ax2.set_title("Tx delay (s) vs time since start (s)") + ax2.plot(x, y, label=f'{filename}: data') + sign = '+ ' if f2 > 0 else '' + label = f"{filename}: Line of best fit: {f1}(sec) {sign}{f2}" + ax2.plot(np.array(x), f1*np.array(x)+f2, label=label) + ax2.legend(loc="upper right") + ax2.set(xlabel="Time (s)", ylabel="Latency (s)") + + # plot throughput data + ax1.set_title("Throughput (TX/s) vs. time (s)") + ax1.plot(tx_vals, rates, label="Throughput") + ax1.plot(tx_vals, th_moving_avg, label="(Moving) Average Throughput") + ax1.legend(loc="upper right") + ax1.set(xlabel="Time (s)", ylabel="Throughput (TX/s)") + if fresh: + fig.savefig(f"{filename}_performance.png") + + +def read_in_data(fname) -> list: + ''' + get data from file and return as a list of lines + ''' + if not os.path.isfile(fname): + print(f'File {fname} does not exist') + sys.exit(1) + + lines = [] + try: + with open(fname, 'r') as f: + lines = f.readlines() + except IOError as e: + print(f'Error reading from file {fname}\n{e}\n') + sys.exit(1) + + return lines + + +if __name__ == '__main__': + + args = parse_args() + tests_dir = args.tests_dir + + # Get all tx sample files in the test directory + f_list = glob.glob(f'{tests_dir}/tx_samples_*.txt') + if not f_list: + print(f'No tx_samples files found in {tests_dir = }') + sys.exit(1) + + global_fig, global_axs = plt.subplots(1, 2, figsize=(12, 5)) + + for file in f_list: + plot_latency(file) + plot_latency(file, global_fig, global_axs[0], global_axs[1]) + + global_fig.savefig(f'{tests_dir}/aggregate_performance.png') diff --git a/scripts/plot.py b/scripts/plot.py deleted file mode 100644 index 855dcdef7..000000000 --- a/scripts/plot.py +++ /dev/null @@ -1,96 +0,0 @@ -import matplotlib.pyplot as plt -import numpy as np -import os.path -import glob -import sys - -# Usage: python plot.py -# generate plots from tx_samples -# list, Axes -> void -def plot_latency(fname, fig=None, ax1=None, ax2=None): - x = [] - y = [] - th_moving_avg = [] - rates = [] - tx_vals = [] - fresh = False # is this the plot of all data, or just a single plot - - if (fig == None): # create new axes if necessary - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) - fig.suptitle(fname+" performance data") - fresh = True - local_file = fname.split('/') - local_file = local_file[-1].split('_') - id = local_file[-1].split('.')[0] - nm = 'loadgen_' + id - - # get data from file - data = read_in_data(fname) - # first sample in file is reference time 0 - time_start = int(data[0].split()[0]) - - queue = [] - queue_max = 15 - - t_prev = 0 - - # Format data for plotting - for i in range(len(data)): - d = data[i].split() - if (len(d) < 2): - break - a = data[i].split() - x.append((int(a[0]) - time_start)/10**9) - y.append(int(a[1])/10**9) - if (x[i] - x[t_prev] > 1): - tx_vals.append(x[i]) - rates.append(i-t_prev) - th_moving_avg.append(np.mean(rates)) - t_prev = i - - # get line of best fit - f1, f2 = np.polyfit(x, y, 1) - f1 = round(f1, 3) - f2 = round(f2, 3) - - # plot latency data - ax2.set_title("Tx delay (s) vs time since start (s)") - string = nm + ': data' - ax2.plot(x, y, label=string) - sign = '+ ' if f2 > 0 else '' - string = "Line of best fit: " + str(f1) + "(sec) " + sign + str(f2) - string = nm + ': ' + string - ax2.plot(np.array(x), f1*np.array(x)+f2, label=string) - ax2.legend(loc="upper right") - ax2.set(xlabel="Time (s)", ylabel="Latency (s)") - - # plot throughput data - ax1.set_title("Throughput (TX/s) vs. time (s)") - ax1.plot(tx_vals, rates, label="Throughput") - ax1.plot(tx_vals, th_moving_avg, label="(Moving) Average Throughput") - ax1.legend(loc="upper right") - ax1.set(xlabel="Time (s)", ylabel="Throughput (TX/s)") - if (fresh): - fig.savefig(nm + "_performance.png") - -# get data from file -def read_in_data(fname): - if (not os.path.isfile(fname)): - raise Exception("Cannot find file " + fname) - fin = open(fname, "r") - data = fin.readlines() - fin.close() - return data - - -if __name__ == '__main__': - path = "." - # Get path to test data - if (len(sys.argv) > 1): - path = str(sys.argv[1]) - f_list = glob.glob(path + '/tx_samples_*.txt') - global_fig, global_axs = plt.subplots(1, 2, figsize=(12, 5)) - for fin in f_list: - plot_latency(fin) - plot_latency(fin, global_fig, global_axs[0], global_axs[1]) - global_fig.savefig(path + "/aggregate_performance.png") diff --git a/scripts/pylint.sh b/scripts/pylint.sh new file mode 100755 index 000000000..9c22df164 --- /dev/null +++ b/scripts/pylint.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +ROOT="$(cd "$(dirname "$0")"/.. && pwd)" +PREFIX="${ROOT}"/prefix +MIN_CODE_QUALITY=8.0 + +get_code_score() { + if [ -n "$1" ]; then + # set minimum quality to user input (int/float) if provided and (5.0 <= input <= 10.0) + if [[ $1 =~ ^([0-9]+)*([\.][0-9])?$ ]]; then + if (( $(echo "$1 >= 5.0" | bc -l) )) && (( $(echo "$1 <= 10.0" | bc -l) )); then + MIN_CODE_QUALITY=$1 + else + # In the future, we want code quality to be at minimum 8.0/10.0 + echo "Code quality score must be between 5.0 and 10.0, inclusive." + echo "Recommended code quality score is >= 8.0." + exit 1 + fi + else + echo "Code quality score must be an integer or floating point number." + exit 1 + fi + fi + echo "Linting Python code with minimum quality of $MIN_CODE_QUALITY/10.0..." +} + +check_pylint() { + if ! command -v pylint &>/dev/null; then + echo "pylint is not installed." + echo "Run 'sudo ./scripts/install-build-tools.sh' to install pylint." + exit 1 + fi +} + +get_code_score $1 +if source "${ROOT}/scripts/activate-venv.sh"; then + echo "Virtual environment activated." +else + echo "Failed to activate virtual environment." + exit 1 +fi + +check_pylint +if ! pylint scripts src tests tools --rcfile=.pylintrc \ + --fail-under=$MIN_CODE_QUALITY $(git ls-files '*.py'); then + echo "Linting failed, please fix the issues and rerun." + exit 1 +else + echo "Linting passed." +fi diff --git a/tools/bench/parsec/evm/contracts/gen_header.py b/tools/bench/parsec/evm/contracts/gen_header.py index e77c9a9fd..d8b8a8712 100644 --- a/tools/bench/parsec/evm/contracts/gen_header.py +++ b/tools/bench/parsec/evm/contracts/gen_header.py @@ -2,15 +2,16 @@ # Federal Reserve Bank of Boston # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. -import json import os +import json import re -# Conversion method from camelCase to snake_case -snake_convert_pattern = re.compile(r'(?_ for generating the input data necessary to # call the given method on the contract -contracts = {'artifacts/contracts/ERC20.sol/Token.json':'erc20'} - -# Load the JSON outputs of the hardhat compilation for each contract we want -# to include in the header file -loaded_contracts = {} -for k, v in contracts.items(): - with open(k) as f: - loaded_contracts[v] = json.load(f) - -# Make sure our output folder exists -if not os.path.exists('cpp_header'): - os.makedirs('cpp_header') - -with open('cpp_header/contracts.hpp', 'w+') as f: - # Write the standard copyright header in the header file - f.write('// Copyright (c) 2022 MIT Digital Currency Initiative,\n') - f.write('// Federal Reserve Bank of Boston\n') - f.write('// Distributed under the MIT software license, see the accompanying\n') - f.write('// file COPYING or http://www.opensource.org/licenses/mit-license.php.\n\n') - f.write('#ifndef OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n') - f.write('#define OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') - f.write('#include "util/common/buffer.hpp"\n\n') - f.write('#include "parsec/agent/runners/evm/hash.hpp"\n\n') - - # The first 4 bytes of the input data sent to a contract are the method - # selector in ETH. It is the first 4 bytes of keccak256() - f.write('namespace cbdc::parsec::evm_contracts {\n') - - - # The first 4 bytes of the input data sent to a contract are the method - # selector in ETH. It is the first 4 bytes of keccak256() - f.write(' static constexpr size_t selector_size = 4;\n') - - # Parameters in a method call are always 32 bytes - f.write(' static constexpr size_t param_size = 32;\n\n') - - # Because parameters are 32 bytes, addresses need to be copied at a 12 bytes - # offset - f.write(' static constexpr size_t address_param_offset = 12; // in ABIs addresses are also 32 bytes\n') - - # Generate methods for all contracts - for k, v in loaded_contracts.items(): - # The data needed to deploy the contract, which is essentially the - # byte code parameter in the compiled asset JSON - f.write(' auto data_{}_deploy() -> cbdc::buffer {{\n'.format(k)) - f.write(' auto buf = cbdc::buffer::from_hex("{}");\n'.format(v['bytecode'][2:])) - f.write(' return buf.value();\n') - f.write(' }\n\n') - - # Loop over the functions in the ABI - for abi in v['abi']: - # Only make methods for functions, ignore events (for now) - if abi['type'] == 'function': - # Write the method name data__ - f.write('auto data_{}_{}('.format(k, to_snake(abi['name']))) - - # Write all parameters as function arguments - inp_idx = 0 - for inp in abi['inputs']: - tp = 'bytes32' - if inp['type'] == 'uint256': - tp = 'uint256be' - if inp['type'] == 'address': - tp = 'address' - if inp_idx > 0: - f.write(', ') - f.write('evmc::{} {}'.format(tp, to_snake(inp['name']))) - inp_idx = inp_idx + 1 - - # Write the return method and creation of the empty buffer - f.write(') -> cbdc::buffer {\n') - f.write(' auto buf = cbdc::buffer();\n') - - # Write the method selector calculation - f.write(' const auto selector_{name} = std::string("{name_raw}('.format_map(dict({'name':to_snake(abi['name']),'name_raw':abi['name']}))) - inp_idx = 0 - for inp in abi['inputs']: - if inp_idx > 0: - f.write(',') - f.write(inp['type']) - inp_idx = inp_idx + 1 - f.write(')");\n') - - # Write calculation of the selector hash and appending it to the buffer - f.write(' auto selector_hash = cbdc::keccak_data(selector_{name}.data(), selector_{name}.size());\n'.format_map(dict({'name':to_snake(abi['name'])}))) - f.write(' buf.append(selector_hash.data(), selector_size);\n') - - # Write code that appends the parameters to the buffer (if any) - if len(abi['inputs']) > 0: - for i, inp in enumerate(abi['inputs']): +contracts_dict = {'artifacts/contracts/ERC20.sol/Token.json':'erc20'} + +# helper functions +def create_loaded_contracts(contracts: dict) -> dict: + ''' + Load the JSON outputs of the hardhat compilation for + each contract we want to include in the header file + ''' + loaded_contracts = {} + contracts_read = 0 + for k, v in contracts.items(): + try: + with open(k, 'r', encoding='utf-8') as file: + loaded_contracts[v] = json.load(file) + contracts_read += 1 + except FileNotFoundError: + print(f'File {k} not found, skipping') + continue + except IOError: + print(f'Error reading {k}, skipping') + continue + + if contracts_read == 0: + print('No contracts loaded, exiting') + exit(1) + + return loaded_contracts + +def camel_to_snake(name) -> str: + ''' + Function to convert camelCase to snake_case + ''' + snake_convert_pattern = re.compile(r'(? None: + ''' + Function to write the header file + ''' + # Make sure our output folder exists + output_folder = 'cpp_header' + output_file = f'{output_folder}/contracts.hpp' + os.makedirs(output_folder, exist_ok=True) + + with open(output_file, 'w+', encoding='utf-8') as f: + # Write the standard copyright header in the header file + for line in copyright_license: + f.write(f'{line}\n') + f.write('\n') + + f.write('#ifndef OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n') + f.write('#define OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') + f.write('#include "util/common/buffer.hpp"\n\n') + f.write('#include "parsec/agent/runners/evm/hash.hpp"\n\n') + + # Write the namespace for the contracts + f.write('namespace cbdc::parsec::evm_contracts {\n') + + # The first 4 bytes of the input data sent to a contract is the + # method selector in ETH. It is the first 4 bytes of + # keccak256() + f.write(' static constexpr size_t selector_size = 4;\n') + + # Parameters in a method call are always 32 bytes + f.write(' static constexpr size_t param_size = 32;\n\n') + + # Since params are 32 bytes, addrs must be copied at a 12 bytes offset + f.write(' static constexpr size_t address_param_offset = 12; // in ABIs addresses are also 32 bytes\n') + + # Generate methods for all contracts + for k, v in loaded_contracts.items(): + # The data needed to deploy the contract, which is essentially the + # byte code parameter in the compiled asset JSON + f.write(f' auto data_{k}_deploy() -> cbdc::buffer {{\n') + f.write(f' auto buf = cbdc::buffer::from_hex("{v["bytecode"][2:]}");\n') + f.write(' return buf.value();\n') + f.write(' }\n\n') + + # Loop over the functions in the ABI + for abi in v['abi']: + # Only make methods for functions, ignore events (for now) + if abi['type'] == 'function': + # Write the method name data__ + f.write(f'auto data_{k}_{camel_to_snake(abi["name"])}(') + # Write all parameters as function arguments + for idx, inp in enumerate(abi['inputs']): + tp = 'bytes32' + if inp['type'] == 'uint256': + tp = 'uint256be' if inp['type'] == 'address': - f.write(' buf.extend(address_param_offset);\n') - f.write(' buf.append({name}.bytes, sizeof({name}.bytes));\n'.format_map(dict({'name':to_snake(inp['name'])}))) - - # Return the buffer we built - f.write(' return buf;\n') - f.write(' }\n\n') - - f.write('}\n\n') - f.write('#endif // OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') - + tp = 'address' + if idx > 0: + f.write(', ') + f.write(f'evmc::{tp} {camel_to_snake(inp["name"])}') + + # Write the return method and creation of the empty buffer + f.write(') -> cbdc::buffer {\n') + f.write(' auto buf = cbdc::buffer();\n') + + # Write the method selector calculation + f.write(' const auto selector_{name} = std::string("{name_raw}('.format_map(dict({'name':camel_to_snake(abi['name']),'name_raw':abi['name']}))) + for idx, inp in enumerate(abi['inputs']): + if idx > 0: + f.write(',') + f.write(inp['type']) + f.write(')");\n') + + # Write calculation of the selector hash and appending it to the buffer + f.write(' auto selector_hash = cbdc::keccak_data(selector_{name}.data(), selector_{name}.size());\n'.format_map(dict({'name':camel_to_snake(abi['name'])}))) + f.write(' buf.append(selector_hash.data(), selector_size);\n') + + # Write code that appends the params to the buffer (if any) + if len(abi['inputs']) > 0: + for inp in abi['inputs']: + if inp['type'] == 'address': + f.write(' buf.extend(address_param_offset);\n') + f.write(' buf.append({name}.bytes, sizeof({name}.bytes));\n'.format_map(dict({'name':camel_to_snake(inp['name'])}))) + f.write(' return buf;\n }\n\n') + + f.write('}\n\n') + f.write('#endif // OPENCBDC_TX_TOOLS_BENCH_PARSEC_EVM_CONTRACTS_H_\n\n') + + +if __name__ == '__main__': + + # Load the contracts + loaded_contracts_dict = create_loaded_contracts(contracts_dict) + + # Write the header file + write_header_file(loaded_contracts_dict)