Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate performance benchmark #2742

Merged
merged 28 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e93d732
Add parameterized perf test template
goodsong81 Nov 23, 2023
f17bf41
Split acccuracy / perf tests
goodsong81 Dec 15, 2023
8b5bfad
Apply datetime-based output directoy
goodsong81 Dec 15, 2023
4debeeb
Fix choice options
goodsong81 Dec 15, 2023
b9b3566
Exec based on model ID
goodsong81 Dec 15, 2023
c733b25
Refactor out Benchmark class
goodsong81 Dec 19, 2023
3996eb3
Automate speed test setting
goodsong81 Dec 19, 2023
9d0e831
Refacor OTXBenchmark
goodsong81 Dec 20, 2023
1abbca8
Add API doc for OTXBenchmark
goodsong81 Dec 20, 2023
752928b
Add csv loading
goodsong81 Dec 20, 2023
046a882
Add tags to benchmark result
goodsong81 Dec 20, 2023
c5ff73e
Add benchmark summary fixture
goodsong81 Dec 20, 2023
4cb50d3
Merge remote-tracking branch 'upstream/develop' into test/perf
goodsong81 Dec 20, 2023
ed553df
Add multi/h-label tests
goodsong81 Dec 21, 2023
d6609f2
Fix pre-commit
goodsong81 Dec 21, 2023
519de9d
Add detection tests
goodsong81 Dec 21, 2023
e447a3c
Add instance segmentationt tests
goodsong81 Dec 21, 2023
742e8cc
Add tiling tests
goodsong81 Dec 21, 2023
c37c144
Add semantic segmenation tests
goodsong81 Dec 21, 2023
3cfaed3
Merge remote-tracking branch 'upstream/develop' into test/perf
goodsong81 Dec 21, 2023
233b18c
Add anomaly test
goodsong81 Dec 21, 2023
8db6c60
Fix anomaly max_epochs setting
goodsong81 Dec 21, 2023
e6cd073
Fix pre-commit
goodsong81 Dec 22, 2023
94bdc00
Add subset_dir_name cfg for seg datasets
goodsong81 Dec 22, 2023
fab9251
Update changelog.md
goodsong81 Dec 22, 2023
c1defe8
Merge remote-tracking branch 'upstream/develop' into test/perf
goodsong81 Dec 22, 2023
91bfec1
Reflect review comments
goodsong81 Dec 26, 2023
5e88a66
Refine doc string
goodsong81 Dec 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file.
### Enhancements

- Upgrade NNCF to 2.7 and OpenVINO to 2023.2 (<https://github.com/openvinotoolkit/training_extensions/pull/2656>)
- Automate performance benchmark (<https://github.com/openvinotoolkit/training_extensions/pull/2742>)

## \[v1.5.0\]

Expand Down
4 changes: 4 additions & 0 deletions tests/perf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""OTX Perfomance tests."""

# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
200 changes: 200 additions & 0 deletions tests/perf/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
"""OTX Benchmark based on tools/experiment.py."""

# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0


import os
import glob
import pandas as pd
import yaml
from pathlib import Path
from typing import List, Optional

from tests.test_suite.run_test_command import check_run


class OTXBenchmark:
"""Benchmark runner based on tools/experiment.py in OTX1.x.

Example:
>>> bm = OTXBenchmark(['random_sample1', 'random_sample2'], data_root='./data/coco')
>>> atss_result = bm.run('MobileNetV2-ATSS')
>>> yolox_result = bm.run('YOLOX-TINY')

Args:
datasets (List[str]): Paths to datasets relative to the data_root.
Intended for, but not restricted to different sampling based on same dataset.
data_root (str): Path to the root of dataset directories. Defaults to './data'.
num_epoch (int): Overrides the per-model default number of epoch settings.
Defaults to 0, which means no overriding.
num_repeat (int): Number for trials with different random seed, which would be set
as range(0, num_repeat). Defaults to 1.
train_params (dict): Additional training parameters.
e.x) {'learning_parameters.num_iters': 2}. Defaults to {}.
track_resources (bool): Whether to track CPU & GPU usage metrics. Defaults to False.
eval_upto (str): The last serial operation to evaluate. Choose one of ('train', 'export', 'optimize').
Operations include the preceeding ones.
e.x) Eval up to 'optimize': train -> eval -> export -> eval -> optimize -> eval
Default to 'train'.
output_root (str): Output root dirctory for logs and results. Defaults to './otx-benchmark'.
dry_run (bool): Whether to just print the OTX command without execution. Defaults to False.
tags (dict): Key-values pair metadata for the experiment. Defaults to {}.
eunwoosh marked this conversation as resolved.
Show resolved Hide resolved
"""

def __init__(
self,
datasets: List[str],
data_root: str = "data",
num_epoch: int = 0,
num_repeat: int = 1,
train_params: dict | None = None,
track_resources: bool = False,
eval_upto: str = "train",
output_root: str = "otx-benchmark",
dry_run: bool = False,
tags: dict | None = None,
subset_dir_names: dict | None = None,
):
self.datasets = datasets
self.data_root = data_root
self.num_epoch = num_epoch
self.num_repeat = num_repeat
self.train_params = train_params or {}
self.track_resources = track_resources
self.eval_upto = eval_upto
self.output_root = output_root
self.dry_run = dry_run
self.tags = tags or {}
self.subset_dir_names = subset_dir_names or {"train": "", "val": "", "test": ""}

def run(
self,
model_id: str,
train_params: dict = {},
tags: dict = {},
) -> pd.DataFrame | None:
"""Run configured benchmark with given model and return the result.

Args:
model_id (str): Target model identifier
train_params (dict): Overrides global benchmark train params
tags (dict): Overrides global benchmark tags

Retruns:
pd.DataFrame | None: Table with benchmark metrics
"""

# Build config file
cfg = self._build_config(model_id, train_params, tags)
cfg_dir = Path(cfg["output_path"])
cfg_dir.mkdir(parents=True, exist_ok=True)
cfg_path = cfg_dir / "cfg.yaml"
with open(cfg_path, "w") as cfg_file:
yaml.dump(cfg, cfg_file, indent=2)
cmd = [
"python",
"tools/experiment.py",
"-f",
cfg_path,
]
if self.dry_run:
cmd.append("-d")
# Run benchmark
check_run(cmd)
# Load result
result = self.load_result(cfg_dir)
return result

@staticmethod
def load_result(result_path: str) -> pd.DataFrame | None:
"""Load benchmark results recursively and merge as pd.DataFrame.

Args:
result_path (str): Result directory or speicific file.

Retruns:
pd.DataFrame: Table with benchmark metrics & options
eunwoosh marked this conversation as resolved.
Show resolved Hide resolved
"""
# Search csv files
if os.path.isdir(result_path):
csv_file_paths = glob.glob(f"{result_path}/**/exp_summary.csv", recursive=True)
else:
csv_file_paths = [result_path]
results = []
# Load csv data
for csv_file_path in csv_file_paths:
result = pd.read_csv(csv_file_path)
# Append metadata if any
cfg_file_path = Path(csv_file_path).parent / "cfg.yaml"
if cfg_file_path.exists():
with cfg_file_path.open("r") as cfg_file:
tags = yaml.safe_load(cfg_file).get("tags", {})
for k, v in tags.items():
result[k] = v
results.append(result)
if len(results) > 0:
return pd.concat(results, ignore_index=True)
else:
return None

def _build_config(
self,
model_id: str,
train_params: dict = {},
tags: dict = {},
) -> dict:
"""Build config for tools/expeirment.py."""
all_train_params = self.train_params.copy()
all_train_params.update(train_params)
all_tags = self.tags.copy()
all_tags.update(tags)

cfg = {}
cfg["tags"] = all_tags # metadata
cfg["output_path"] = os.path.abspath(Path(self.output_root) / "-".join(list(all_tags.values()) + [model_id]))
cfg["constants"] = {
goodsong81 marked this conversation as resolved.
Show resolved Hide resolved
"dataroot": os.path.abspath(self.data_root),
}
cfg["variables"] = {
"model": [model_id],
"data": self.datasets,
}
cfg["repeat"] = self.num_repeat
cfg["command"] = []
resource_param = ""
if self.track_resources:
resource_param = "--track-resource-usage all"
if self.num_epoch > 0:
self._set_num_epoch(model_id, all_train_params, self.num_epoch)
params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()])
cfg["command"].append(
"otx train ${model}"
" --train-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['train']}"
" --val-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['val']}"
" --deterministic"
f" {resource_param}"
f" params {params_str}"
)
cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}")
if self.eval_upto == "train":
return cfg

cfg["command"].append("otx export")
cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}")
if self.eval_upto == "export":
return cfg

cfg["command"].append("otx optimize")
cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}")
return cfg

@staticmethod
def _set_num_epoch(model_id: str, train_params: dict, num_epoch: int):
"""Set model specific num_epoch parameter."""
if "padim" in model_id:
return # No configurable parameter for num_epoch
elif "stfpm" in model_id:
goodsong81 marked this conversation as resolved.
Show resolved Hide resolved
train_params["learning_parameters.max_epochs"] = num_epoch
else:
train_params["learning_parameters.num_iters"] = num_epoch
146 changes: 146 additions & 0 deletions tests/perf/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0


import pytest
import os
import subprocess
import yaml
from pathlib import Path
from typing import List
from datetime import datetime

from otx.api.entities.model_template import ModelTemplate, ModelCategory
from .benchmark import OTXBenchmark


def pytest_addoption(parser):
"""Add custom options for perf tests."""
parser.addoption(
"--model-type",
action="store",
default="all",
choices=("default", "all"),
help="Choose default|all. Defaults to all.",
)
parser.addoption(
"--data-size",
action="store",
default="all",
choices=("small", "medium", "large", "all"),
help="Choose small|medium|large|all. Defaults to all.",
)
parser.addoption(
"--num-repeat",
action="store",
default=0,
help="Overrides default per-data-size number of repeat setting. "
"Random seeds are set to 0 ~ num_repeat-1 for the trials. "
"Defaults to 0 (small=3, medium=3, large=1).",
)
parser.addoption(
"--num-epoch",
action="store",
default=0,
help="Overrides default per-model number of epoch setting. "
"Defaults to 0 (per-model epoch & early-stopping).",
)
parser.addoption(
"--eval-upto",
action="store",
default="train",
choices=("train", "export", "optimize"),
help="Choose train|export|optimize. Defaults to train.",
)
parser.addoption(
"--data-root",
action="store",
default="data",
help="Dataset root directory.",
)
parser.addoption(
"--output-root",
action="store",
default="exp/perf",
help="Output root directory.",
)
parser.addoption(
"--dry-run",
action="store_true",
default=False,
help="Print OTX commands without execution.",
)


@pytest.fixture(scope="session")
def fxt_output_root(request: pytest.FixtureRequest) -> Path:
"""Output root + date + short commit hash."""
output_root = request.config.getoption("--output-root")
data_str = datetime.now().strftime("%Y%m%d-%H%M%S")
commit_str = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip()
return Path(output_root) / (data_str + "-" + commit_str)


@pytest.fixture
def fxt_model_id(request: pytest.FixtureRequest) -> str:
"""Skip by model category."""
model_type: str = request.config.getoption("--model-type")
model_template: ModelTemplate = request.param
if model_type == "default":
if model_template.model_category == ModelCategory.OTHER:
pytest.skip(f"{model_template.model_category} category model")
return model_template.model_template_id


@pytest.fixture
def fxt_benchmark(request: pytest.FixtureRequest, fxt_output_root: Path) -> OTXBenchmark:
"""Configure benchmark."""
# Skip by dataset size
data_size_option: str = request.config.getoption("--data-size")
data_size: str = request.param[0]
if data_size_option != "all":
if data_size_option != data_size:
pytest.skip(f"{data_size} datasets")

# Options
cfg: dict = request.param[1].copy()

tags = cfg.get("tags", {})
tags["data_size"] = data_size
cfg["tags"] = tags

num_epoch_override: int = int(request.config.getoption("--num-epoch"))
if num_epoch_override > 0: # 0: use default
cfg["num_epoch"] = num_epoch_override
if "test_speed" in request.node.name:
if cfg.get("num_epoch", 0) == 0: # No user options
cfg["num_epoch"] = 2

num_repeat_override: int = int(request.config.getoption("--num-repeat"))
if num_repeat_override > 0: # 0: use default
cfg["num_repeat"] = num_repeat_override

cfg["eval_upto"] = request.config.getoption("--eval-upto")
cfg["data_root"] = request.config.getoption("--data-root")
cfg["output_root"] = str(fxt_output_root)
cfg["dry_run"] = request.config.getoption("--dry-run")

# Create benchmark
benchmark = OTXBenchmark(
**cfg,
)

return benchmark


@pytest.fixture(scope="session", autouse=True)
def fxt_benchmark_summary(fxt_output_root: Path):
"""Summarize all results at the end of test session."""
yield
all_results = OTXBenchmark.load_result(fxt_output_root)
if all_results is not None:
print("=" * 20, "[Benchmark summary]")
print(all_results)
output_path = fxt_output_root / "benchmark-summary.csv"
all_results.to_csv(output_path, index=False)
print(f" -> Saved to {output_path}.")
Loading
Loading