Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ft-benchmark] some fixes for benchmark infra #11604

Merged
merged 1 commit into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions scripts/ft-benchmark-data-sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@
from time import sleep
import numpy as np
import subprocess
import psycopg2
from psycopg2 import sql
from os import getenv, chdir
from os import chdir
import os
import json
import tempfile

# Duration of experiment in hours
DURATION = 2
import argparse

# TPS polling interval in seconds
POLL_INTERVAL = 30
Expand Down Expand Up @@ -89,24 +85,45 @@ def get_commit() -> tuple[str, datetime]:


def commit_to_db(data: dict) -> None:
print(data)
chdir(os.path.expanduser("~/nearcore/benchmarks/continous/db/tool"))
with tempfile.NamedTemporaryFile() as fp:
with tempfile.NamedTemporaryFile(mode="w", encoding='utf-8') as fp:
json.dump(data, fp)
fp.close()
fp.flush()
os.fsync(fp.fileno())
subprocess.run(f"cargo run -p cli -- insert-ft-transfer {fp.name}",
shell=True)
fp.close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should be able to remove flush and fsync by moving this close call after json.dump.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately no. Probably setting some options while creating tmp file object can help, but I spent a lot of time on this function and didn't find such options

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error message was a bit unobvious, so I spent a time playing with tmp file options and place of fp.close(), before figured out, that it just don't flushing for some reason

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you provide steps to reproduce this? According to Python documentation, close is guaranteed to call flush: https://docs.python.org/3/library/io.html#io.IOBase.close

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



# TODO: send signal to this process if ft-benchmark.sh decided to switch neard to another commit.
# add handling of this signal to this script
if __name__ == "__main__":

parser = argparse.ArgumentParser(
description=
"Collect data from local prometheus and send to ft-benchmark db.")
parser.add_argument('--duration',
type=int,
required=True,
help='Duration of experiment in seconds')
parser.add_argument('--users',
type=int,
required=True,
help='Number of users')
args = parser.parse_args()
DURATION = args.duration / 3600

state_size = (int(
subprocess.check_output(["du", "-s", "~/.near/localnet/node0/data"],
stderr=subprocess.PIPE,
shell=True).decode("utf-8").split()[0]) * 1024)
processed_transactions = []
time_begin = datetime.now()
while True:
print(
f"Data sender loop. Time elapsed: {(datetime.now() - time_begin).seconds} seconds"
)
if (datetime.now() - time_begin).seconds / 3600 > DURATION:
break
processed_transactions.append(calculate_processed_transactions())
Expand All @@ -131,10 +148,9 @@ def commit_to_db(data: dict) -> None:
"disjoint_workloads":
False, # TODO: probably should be filled by terraform
"num_shards": calculate_shards(),
"num_unique_users":
1000, # TODO: probably should be filled by terraform or ft-benchmark.sh
"num_unique_users": args.users,
"size_state_bytes": state_size,
"tps": int(average_tps),
"total_transactions": processed_transactions[-1],
"total_transactions": int(processed_transactions[-1]),
}
commit_to_db(response)
12 changes: 6 additions & 6 deletions scripts/run-ft-benchmark.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import argparse
import os
import subprocess
from locust.util.timespan import parse_timespan

LOCK_FILE = "/tmp/run-ft-benchmark.lock"
REPO_DIR = "~/nearcore"
REPO_DIR = os.path.expanduser("~/nearcore")


def create_lock_file(user: str) -> None:
Expand All @@ -25,7 +26,7 @@ def remove_lock_file() -> None:
def run_benchmark(repo_dir: str, time: str, users: int, shards: int, nodes: int,
rump_up: int) -> None:
benchmark_command = (
f"./scripts/start_benchmark.sh {time} {users} {shards} {nodes} {rump_up}"
f"./scripts/start-benchmark.sh {time} {users} {shards} {nodes} {rump_up}"
)
subprocess.run(benchmark_command, cwd=repo_dir, shell=True, check=True)

Expand All @@ -49,13 +50,12 @@ def main() -> None:
parser.add_argument('--nodes', type=int, default=1, help="Number of nodes")
parser.add_argument('--rump-up', type=int, default=10, help="Rump-up rate")
parser.add_argument('--user', type=str, default='unknown', help="User name")

args = parser.parse_args()

time_seconds = parse_timespan(args.time)
try:
create_lock_file(args.user)
run_benchmark(REPO_DIR, args.time, args.users, args.shards, args.nodes,
args.rump_up)
run_benchmark(REPO_DIR, time_seconds, args.users, args.shards,
args.nodes, args.rump_up)
except RuntimeError as e:
print(e)
finally:
Expand Down
8 changes: 4 additions & 4 deletions scripts/start-benchmark.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ export KEY=~/.near/localnet/node0/validator_key.json

# Run benchmark
cd pytest/tests/loadtest/locust/
nohup locust -H 127.0.0.1:3030 -f locustfiles/ft.py --funding-key=$KEY -t $TIME -u $USERS -r $RUMP_UP --processes 8 --headless &
nohup locust -H 127.0.0.1:3030 -f locustfiles/ft.py --funding-key=$KEY -t "${TIME}s" -u $USERS -r $RUMP_UP --processes 8 --headless &

# Give locust 5 minutes to start and rump up
sleep 300
# Give locust 0.5 minutes to start and rump up
sleep 30

# Run data collector
cd ~/nearcore
python3 scripts/ft-benchmark-data-sender.py
python3 scripts/ft-benchmark-data-sender.py --duration $TIME --users $USERS

echo "Benchmark completed."
Loading