Skip to content

Commit

Permalink
Merge branch 'master' into msaroufim-patch-7
Browse files Browse the repository at this point in the history
  • Loading branch information
msaroufim authored Apr 20, 2023
2 parents 3aec11f + 067fd91 commit ff9809a
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/benchmark_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ jobs:
if_no_artifact_found: ignore
path: /tmp/ts_artifacts
name: ${{ matrix.hardware }}_benchmark_validation
- name: Validate Benchmark result
run: python benchmarks/validate_report.py --input-artifacts-dir /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
- name: Update benchmark artifacts for auto validation
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
- name: Upload the updated benchmark artifacts for auto validation
Expand Down
22 changes: 16 additions & 6 deletions benchmarks/auto_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@


class BenchmarkConfig:
def __init__(self, yaml_dict, skip_ts_install):
def __init__(self, yaml_dict, skip_ts_install, skip_upload):
self.yaml_dict = yaml_dict
self.skip_ts_install = skip_ts_install
self.skip_upload = skip_upload
self.bm_config = {}
yesterday = datetime.date.today() - datetime.timedelta(days=1)
self.bm_config["version"] = "torchserve-nightly=={}.{}.{}".format(
Expand Down Expand Up @@ -89,9 +90,9 @@ def load_config(self):
self.models(v)
elif k == "hardware":
self.hardware(v)
elif k == "metrics_cmd":
elif k == "metrics_cmd" and not self.skip_upload:
self.metrics_cmd(v)
elif k == "report_cmd":
elif k == "report_cmd" and not self.skip_upload:
report_cmd = v

self.bm_config["model_config_path"] = (
Expand All @@ -110,12 +111,12 @@ def load_config(self):
print("{}={}".format(k, v))


def load_benchmark_config(bm_config_path, skip_ts_install):
def load_benchmark_config(bm_config_path, skip_ts_install, skip_upload):
yaml = ruamel.yaml.YAML()
with open(bm_config_path, "r") as f:
yaml_dict = yaml.load(f)

benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install)
benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install, skip_upload)
benchmark_config.load_config()

return benchmark_config.bm_config
Expand Down Expand Up @@ -285,14 +286,23 @@ def main():
action="store",
help="true: skip torchserve installation. default: true",
)
parser.add_argument(
"--skip_upload",
help="true: skip uploading commands . default: false",
)

arguments = parser.parse_args()
skip_ts_config = (
False
if arguments.skip is not None and arguments.skip.lower() == "false"
else True
)
bm_config = load_benchmark_config(arguments.input, skip_ts_config)
skip_upload = (
True
if arguments.skip_upload is not None and arguments.skip_upload.lower() == "true"
else False
)
bm_config = load_benchmark_config(arguments.input, skip_ts_config, skip_upload)
benchmark_env_setup(bm_config, skip_ts_config)
run_benchmark(bm_config)
clean_up_benchmark_env(bm_config)
Expand Down
75 changes: 75 additions & 0 deletions benchmarks/utils/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import csv

METRICS_VALIDATED = [
"TS throughput",
"TS latency P50",
"TS latency P90",
"TS latency P99",
"Model_p50",
"Model_p90",
"Model_p99",
"memory_percentage_mean",
"gpu_memory_used_mean",
"cpu_percentage_mean",
"gpu_percentage_mean",
]


# Acceptable metric deviation needs a more complicated logic.
# Example: For latencies in 2 digits, 50% might be acceptable
# For 3 digit latencies, 20-30% might be the right value
# For cpu_memory < 15%, 50% deviation works but for CPU > 40%, 10-15%
# might be the right value
ACCEPTABLE_METRIC_DEVIATION = 0.3


class Report:
def __init__(self, deviation=0, num_reports=0):
self.properties = {}
self.mode = None
self.throughput = 0
self.batch_size = 0
self.workers = 0
self.deviation = deviation
self.num_reports = num_reports

def _get_mode(self, csv_file):
cfg = csv_file.split("/")[-2]
cfg = cfg.split("_")
mode = cfg[0] + "_" + cfg[1]
self.mode = mode

def read_csv(self, csv_file):
with open(csv_file, newline="") as f:
reader = csv.DictReader(f)
for k, v in next(reader).items():
if k in METRICS_VALIDATED:
self.properties[k] = float(v)
self._get_mode(csv_file)

def update(self, report):
for property in self.properties:
# sum the properties to find the mean later
self.properties[property] += report.properties[property]

def mean(self):
for k, v in self.properties.items():
self.properties[k] = v / self.num_reports


def metric_valid(key, obs_val, exp_val, threshold):
# In case of throughput, higher is better
# In case of memory, lower is better.
# We ignore lower values for memory related metrices
lower = False
if "throughput" not in key:
lower = True
return check_if_within_threshold(exp_val, obs_val, threshold) or (
(obs_val < exp_val and lower)
)


def check_if_within_threshold(value1, value2, threshold):
if float(value1) == 0.0:
return True
return abs((value1 - value2) / float(value1)) <= threshold
98 changes: 98 additions & 0 deletions benchmarks/validate_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import argparse
import os

from utils.report import (
ACCEPTABLE_METRIC_DEVIATION,
METRICS_VALIDATED,
Report,
metric_valid,
)
from utils.update_artifacts import (
BENCHMARK_ARTIFACTS_PATH,
BENCHMARK_REPORT_FILE,
BENCHMARK_REPORT_PATH,
)


def validate_reports(artifacts_dir, report_dir, deviation):
# Read baseline reports
baseline_reports = {}
num_reports = len(os.listdir(artifacts_dir))
for _d in sorted(os.listdir(artifacts_dir)):
dir = os.path.join(artifacts_dir, _d)
for subdir in sorted(os.listdir(dir)):
csv_file = os.path.join(dir, subdir, BENCHMARK_REPORT_FILE)

report = Report(deviation, num_reports)
report.read_csv(csv_file)
if subdir not in baseline_reports:
baseline_reports[subdir] = report
else:
baseline_reports[subdir].update(report)

# Get the mean value each of the properties for every report
for model, report in baseline_reports.items():
report.mean()
baseline_reports[model] = report

# Read generated reports
generated_reports = {}
for subdir in sorted(os.listdir(report_dir)):
if os.path.isdir(os.path.join(report_dir, subdir)):
csv_file = os.path.join(report_dir, subdir, BENCHMARK_REPORT_FILE)
report = Report()
report.read_csv(csv_file)
generated_reports[subdir] = report

# Compare generated reports with baseline reports
error = False
for model, report in generated_reports.items():
for key in METRICS_VALIDATED:
if not metric_valid(
key,
report.properties[key],
baseline_reports[model].properties[key],
baseline_reports[model].deviation,
):
print(
f"Error while validating {key} for model: {model}, "
f"Expected value: {baseline_reports[model].properties[key]:.2f}, "
f"Observed value: {report.properties[key]:.2f}"
)
error = True
if not error:
print(f"Model {model} successfully validated")

if error:
raise Exception("Failures in benchmark validation")


def main():
parser = argparse.ArgumentParser()

parser.add_argument(
"--input-artifacts-dir",
help="directory where benchmark artifacts have been saved",
type=str,
default=BENCHMARK_ARTIFACTS_PATH,
)

parser.add_argument(
"--input-report-dir",
help="directory where current benchmark report is saved",
type=str,
default=BENCHMARK_REPORT_PATH,
)

parser.add_argument(
"--deviation",
help="acceptable variation in metrics values ",
type=float,
default=ACCEPTABLE_METRIC_DEVIATION,
)
args = parser.parse_args()
validate_reports(args.input_artifacts_dir, args.input_report_dir, args.deviation)


if __name__ == "__main__":
main()

0 comments on commit ff9809a

Please sign in to comment.