diff --git a/CHANGELOG.md b/CHANGELOG.md index 933559f76d4..69848882145 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file. ### Enhancements - Upgrade NNCF to 2.7 and OpenVINO to 2023.2 () +- Automate performance benchmark () ## \[v1.5.0\] diff --git a/tests/perf/__init__.py b/tests/perf/__init__.py new file mode 100644 index 00000000000..9984d0cb25b --- /dev/null +++ b/tests/perf/__init__.py @@ -0,0 +1,4 @@ +"""OTX Perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py new file mode 100644 index 00000000000..f39ed806731 --- /dev/null +++ b/tests/perf/benchmark.py @@ -0,0 +1,202 @@ +"""OTX Benchmark based on tools/experiment.py.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os +import glob +import pandas as pd +import yaml +from pathlib import Path +from typing import List, Optional + +from tests.test_suite.run_test_command import check_run + + +class OTXBenchmark: + """Benchmark runner based on tools/experiment.py in OTX1.x. + + Example: + >>> bm = OTXBenchmark(['random_sample1', 'random_sample2'], data_root='./data/coco') + >>> atss_result = bm.run('MobileNetV2-ATSS') + >>> yolox_result = bm.run('YOLOX-TINY') + + Args: + datasets (List[str]): Paths to datasets relative to the data_root. + Intended for, but not restricted to different sampling based on same dataset. + data_root (str): Path to the root of dataset directories. Defaults to './data'. + num_epoch (int): Overrides the per-model default number of epoch settings. + Defaults to 0, which means no overriding. + num_repeat (int): Number for trials with different random seed, which would be set + as range(0, num_repeat). Defaults to 1. + train_params (dict, optional): Additional training parameters. + e.x) {'learning_parameters.num_iters': 2}. Defaults to {}. + track_resources (bool): Whether to track CPU & GPU usage metrics. Defaults to False. + eval_upto (str): The last serial operation to evaluate. Choose one of ('train', 'export', 'optimize'). + Operations include the preceeding ones. + e.x) Eval up to 'optimize': train -> eval -> export -> eval -> optimize -> eval + Default to 'train'. + output_root (str): Output root dirctory for logs and results. Defaults to './otx-benchmark'. + dry_run (bool): Whether to just print the OTX command without execution. Defaults to False. + tags (dict, optional): Key-values pair metadata for the experiment. + subset_dir_names (dict, optional): Specify dataset subset directory names, if any. + e.x) {"train": "train_10percent", "val": "val_all", "test": "test"} + """ + + def __init__( + self, + datasets: List[str], + data_root: str = "data", + num_epoch: int = 0, + num_repeat: int = 1, + train_params: dict | None = None, + track_resources: bool = False, + eval_upto: str = "train", + output_root: str = "otx-benchmark", + dry_run: bool = False, + tags: dict | None = None, + subset_dir_names: dict | None = None, + ): + self.datasets = datasets + self.data_root = data_root + self.num_epoch = num_epoch + self.num_repeat = num_repeat + self.train_params = train_params or {} + self.track_resources = track_resources + self.eval_upto = eval_upto + self.output_root = output_root + self.dry_run = dry_run + self.tags = tags or {} + self.subset_dir_names = subset_dir_names or {"train": "", "val": "", "test": ""} + + def run( + self, + model_id: str, + train_params: dict = {}, + tags: dict = {}, + ) -> pd.DataFrame | None: + """Run configured benchmark with given model and return the result. + + Args: + model_id (str): Target model identifier + train_params (dict): Overrides global benchmark train params + tags (dict): Overrides global benchmark tags + + Retruns: + pd.DataFrame | None: Table with benchmark metrics + """ + + # Build config file + cfg = self._build_config(model_id, train_params, tags) + cfg_dir = Path(cfg["output_path"]) + cfg_dir.mkdir(parents=True, exist_ok=True) + cfg_path = cfg_dir / "cfg.yaml" + with open(cfg_path, "w") as cfg_file: + yaml.dump(cfg, cfg_file, indent=2) + cmd = [ + "python", + "tools/experiment.py", + "-f", + cfg_path, + ] + if self.dry_run: + cmd.append("-d") + # Run benchmark + check_run(cmd) + # Load result + result = self.load_result(cfg_dir) + return result + + @staticmethod + def load_result(result_path: str) -> pd.DataFrame | None: + """Load benchmark results recursively and merge as pd.DataFrame. + + Args: + result_path (str): Result directory or speicific file. + + Retruns: + pd.DataFrame: Table with benchmark metrics & options + """ + # Search csv files + if os.path.isdir(result_path): + csv_file_paths = glob.glob(f"{result_path}/**/exp_summary.csv", recursive=True) + else: + csv_file_paths = [result_path] + results = [] + # Load csv data + for csv_file_path in csv_file_paths: + result = pd.read_csv(csv_file_path) + # Append metadata if any + cfg_file_path = Path(csv_file_path).parent / "cfg.yaml" + if cfg_file_path.exists(): + with cfg_file_path.open("r") as cfg_file: + tags = yaml.safe_load(cfg_file).get("tags", {}) + for k, v in tags.items(): + result[k] = v + results.append(result) + if len(results) > 0: + return pd.concat(results, ignore_index=True) + else: + return None + + def _build_config( + self, + model_id: str, + train_params: dict = {}, + tags: dict = {}, + ) -> dict: + """Build config for tools/expeirment.py.""" + all_train_params = self.train_params.copy() + all_train_params.update(train_params) + all_tags = self.tags.copy() + all_tags.update(tags) + + cfg = {} + cfg["tags"] = all_tags # metadata + cfg["output_path"] = os.path.abspath(Path(self.output_root) / "-".join(list(all_tags.values()) + [model_id])) + cfg["constants"] = { + "dataroot": os.path.abspath(self.data_root), + } + cfg["variables"] = { + "model": [model_id], + "data": self.datasets, + } + cfg["repeat"] = self.num_repeat + cfg["command"] = [] + resource_param = "" + if self.track_resources: + resource_param = "--track-resource-usage all" + if self.num_epoch > 0: + self._set_num_epoch(model_id, all_train_params, self.num_epoch) + params_str = " ".join([f"--{k} {v}" for k, v in all_train_params.items()]) + cfg["command"].append( + "otx train ${model}" + " --train-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['train']}" + " --val-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['val']}" + " --deterministic" + f" {resource_param}" + f" params {params_str}" + ) + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}") + if self.eval_upto == "train": + return cfg + + cfg["command"].append("otx export") + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}") + if self.eval_upto == "export": + return cfg + + cfg["command"].append("otx optimize") + cfg["command"].append("otx eval --test-data-roots ${dataroot}/${data}" + f"/{self.subset_dir_names['test']}") + return cfg + + @staticmethod + def _set_num_epoch(model_id: str, train_params: dict, num_epoch: int): + """Set model specific num_epoch parameter.""" + if "padim" in model_id: + return # No configurable parameter for num_epoch + elif "stfpm" in model_id: + train_params["learning_parameters.max_epochs"] = num_epoch + else: + train_params["learning_parameters.num_iters"] = num_epoch diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py new file mode 100644 index 00000000000..0d831d50dd1 --- /dev/null +++ b/tests/perf/conftest.py @@ -0,0 +1,146 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest +import os +import subprocess +import yaml +from pathlib import Path +from typing import List +from datetime import datetime + +from otx.api.entities.model_template import ModelTemplate, ModelCategory +from .benchmark import OTXBenchmark + + +def pytest_addoption(parser): + """Add custom options for perf tests.""" + parser.addoption( + "--model-type", + action="store", + default="all", + choices=("default", "all"), + help="Choose default|all. Defaults to all.", + ) + parser.addoption( + "--data-size", + action="store", + default="all", + choices=("small", "medium", "large", "all"), + help="Choose small|medium|large|all. Defaults to all.", + ) + parser.addoption( + "--num-repeat", + action="store", + default=0, + help="Overrides default per-data-size number of repeat setting. " + "Random seeds are set to 0 ~ num_repeat-1 for the trials. " + "Defaults to 0 (small=3, medium=3, large=1).", + ) + parser.addoption( + "--num-epoch", + action="store", + default=0, + help="Overrides default per-model number of epoch setting. " + "Defaults to 0 (per-model epoch & early-stopping).", + ) + parser.addoption( + "--eval-upto", + action="store", + default="train", + choices=("train", "export", "optimize"), + help="Choose train|export|optimize. Defaults to train.", + ) + parser.addoption( + "--data-root", + action="store", + default="data", + help="Dataset root directory.", + ) + parser.addoption( + "--output-root", + action="store", + default="exp/perf", + help="Output root directory.", + ) + parser.addoption( + "--dry-run", + action="store_true", + default=False, + help="Print OTX commands without execution.", + ) + + +@pytest.fixture(scope="session") +def fxt_output_root(request: pytest.FixtureRequest) -> Path: + """Output root + date + short commit hash.""" + output_root = request.config.getoption("--output-root") + data_str = datetime.now().strftime("%Y%m%d-%H%M%S") + commit_str = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip() + return Path(output_root) / (data_str + "-" + commit_str) + + +@pytest.fixture +def fxt_model_id(request: pytest.FixtureRequest) -> str: + """Skip by model category.""" + model_type: str = request.config.getoption("--model-type") + model_template: ModelTemplate = request.param + if model_type == "default": + if model_template.model_category == ModelCategory.OTHER: + pytest.skip(f"{model_template.model_category} category model") + return model_template.model_template_id + + +@pytest.fixture +def fxt_benchmark(request: pytest.FixtureRequest, fxt_output_root: Path) -> OTXBenchmark: + """Configure benchmark.""" + # Skip by dataset size + data_size_option: str = request.config.getoption("--data-size") + data_size: str = request.param[0] + if data_size_option != "all": + if data_size_option != data_size: + pytest.skip(f"{data_size} datasets") + + # Options + cfg: dict = request.param[1].copy() + + tags = cfg.get("tags", {}) + tags["data_size"] = data_size + cfg["tags"] = tags + + num_epoch_override: int = int(request.config.getoption("--num-epoch")) + if num_epoch_override > 0: # 0: use default + cfg["num_epoch"] = num_epoch_override + if "test_speed" in request.node.name: + if cfg.get("num_epoch", 0) == 0: # No user options + cfg["num_epoch"] = 2 + + num_repeat_override: int = int(request.config.getoption("--num-repeat")) + if num_repeat_override > 0: # 0: use default + cfg["num_repeat"] = num_repeat_override + + cfg["eval_upto"] = request.config.getoption("--eval-upto") + cfg["data_root"] = request.config.getoption("--data-root") + cfg["output_root"] = str(fxt_output_root) + cfg["dry_run"] = request.config.getoption("--dry-run") + + # Create benchmark + benchmark = OTXBenchmark( + **cfg, + ) + + return benchmark + + +@pytest.fixture(scope="session", autouse=True) +def fxt_benchmark_summary(fxt_output_root: Path): + """Summarize all results at the end of test session.""" + yield + all_results = OTXBenchmark.load_result(fxt_output_root) + if all_results is not None: + print("=" * 20, "[Benchmark summary]") + print(all_results) + output_path = fxt_output_root / "benchmark-summary.csv" + all_results.to_csv(output_path, index=False) + print(f" -> Saved to {output_path}.") diff --git a/tests/perf/test_anomaly.py b/tests/perf/test_anomaly.py new file mode 100644 index 00000000000..db16f7172ea --- /dev/null +++ b/tests/perf/test_anomaly.py @@ -0,0 +1,184 @@ +"""OTX Anomaly perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +class TestPerfAnomalyClassification: + """Benchmark anomaly classification.""" + + MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ANOMALY_CLASSIFICATION").templates + MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "anomaly_classification", + }, + "datasets": [ + "anomaly/mvtec/bottle_small/1", + "anomaly/mvtec/bottle_small/2", + "anomaly/mvtec/bottle_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "anomaly_classification", + }, + "datasets": [ + "anomaly/mvtec/wood_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "anomaly_classification", + }, + "datasets": [ + "anomaly/mvtec/hazelnut_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfAnomalyDetection: + """Benchmark anomaly detection.""" + + MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ANOMALY_DETECTION").templates + MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "anomaly_detection", + }, + "datasets": [ + "anomaly/mvtec/bottle_small/1", + "anomaly/mvtec/bottle_small/2", + "anomaly/mvtec/bottle_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "anomaly_detection", + }, + "datasets": [ + "anomaly/mvtec/wood_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "anomaly_detection", + }, + "datasets": [ + "anomaly/mvtec/hazelnut_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfAnomalySegmentation: + """Benchmark anomaly segmentation.""" + + MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="ANOMALY_SEGMENTATION").templates + MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "anomaly_segmentation", + }, + "datasets": [ + "anomaly/mvtec/bottle_small/1", + "anomaly/mvtec/bottle_small/2", + "anomaly/mvtec/bottle_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "anomaly_segmentation", + }, + "datasets": [ + "anomaly/mvtec/wood_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "anomaly_segmentation", + }, + "datasets": [ + "anomaly/mvtec/hazelnut_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) diff --git a/tests/perf/test_classification.py b/tests/perf/test_classification.py new file mode 100644 index 00000000000..15a9b4dd133 --- /dev/null +++ b/tests/perf/test_classification.py @@ -0,0 +1,179 @@ +"""OTX Classification perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="CLASSIFICATION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + +class TestPerfSingleLabelClassification: + """Benchmark single-label classification.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "single_label_classification", + }, + "datasets": [ + "classification/single_label/multiclass_CUB_small/1", + "classification/single_label/multiclass_CUB_small/2", + "classification/single_label/multiclass_CUB_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "single_label_classification", + }, + "datasets": [ + "classification/single_label/multiclass_CUB_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "single_label_classification", + }, + "datasets": [ + "classification/single_label/multiclass_food101_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfMultiLabelClassification: + """Benchmark multi-label classification.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "multi_label_classification", + }, + "datasets": [ + "classification/multi_label/multilabel_CUB_small/1", + "classification/multi_label/multilabel_CUB_small/2", + "classification/multi_label/multilabel_CUB_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "multi_label_classification", + }, + "datasets": [ + "classification/multi_label/multilabel_CUB_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "multi_label_classification", + }, + "datasets": [ + "classification/multi_label/multilabel_food101_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfHierarchicalLabelClassification: + """Benchmark hierarchcial-label classification.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "hierarchical_label_classification", + }, + "datasets": [ + "classification/h_label/h_label_CUB_small/1", + "classification/h_label/h_label_CUB_small/2", + "classification/h_label/h_label_CUB_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "hierarchical_label_classification", + }, + "datasets": [ + "classification/h_label/h_label_CUB_medium", + ], + "num_repeat": 3, + }, + # TODO: Add large dataset + # "large": { + # "tags": { + # "task": "hierarchical_label_classification", + # }, + # "datasets": [ + # ], + # "num_repeat": 1, + # }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) diff --git a/tests/perf/test_detection.py b/tests/perf/test_detection.py new file mode 100644 index 00000000000..81ed71c0bac --- /dev/null +++ b/tests/perf/test_detection.py @@ -0,0 +1,69 @@ +"""OTX Detection perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="DETECTION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + +class TestPerfDetection: + """Benchmark basic object detection.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "detection", + }, + "datasets": [ + "detection/pothole_small/1", + "detection/pothole_small/2", + "detection/pothole_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "detection", + }, + "datasets": [ + "detection/pothole_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "detection", + }, + "datasets": [ + "detection/vitens_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) diff --git a/tests/perf/test_instance_segmenatation.py b/tests/perf/test_instance_segmenatation.py new file mode 100644 index 00000000000..6e4a1a9b275 --- /dev/null +++ b/tests/perf/test_instance_segmenatation.py @@ -0,0 +1,130 @@ +"""OTX Instance Segmentation perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="INSTANCE_SEGMENTATION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + +class TestPerfInstanceSegmentation: + """Benchmark basic instance segmentation.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "instance_seg/wgisd_small/1", + "instance_seg/wgisd_small/2", + "instance_seg/wgisd_small/3", + ], + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "instance_seg/coco_car_person_medium", + ], + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "instance_segmentation", + }, + "datasets": [ + "instance_seg/bdd_large", + ], + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) + + +class TestPerfTilingInstanceSegmentation: + """Benchmark tiling instance segmentation.""" + + TILING_PARAMS = { + "tiling_parameters.enable_tiling": 1, + } + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "tiling_instance_segmentation", + }, + "datasets": [ + "tiling_instance_seg/vitens_aeromonas_small/1", + "tiling_instance_seg/vitens_aeromonas_small/2", + "tiling_instance_seg/vitens_aeromonas_small/3", + ], + "num_repeat": 3, + "train_params": TILING_PARAMS, + }, + "medium": { + "tags": { + "task": "tiling_instance_segmentation", + }, + "datasets": [ + "tiling_instance_seg/vitens_aeromonas_medium", + ], + "num_repeat": 3, + "train_params": TILING_PARAMS, + }, + "large": { + "tags": { + "task": "tiling_instance_segmentation", + }, + "datasets": [ + "tiling_instance_seg/bdd_large", + ], + "num_repeat": 1, + "train_params": TILING_PARAMS, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) diff --git a/tests/perf/test_semantic_segmentation.py b/tests/perf/test_semantic_segmentation.py new file mode 100644 index 00000000000..a5ca4086f83 --- /dev/null +++ b/tests/perf/test_semantic_segmentation.py @@ -0,0 +1,72 @@ +"""OTX Semantic Segmentation perfomance tests.""" + +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import pytest + +from otx.cli.registry import Registry +from .benchmark import OTXBenchmark + + +MODEL_TEMPLATES = Registry(f"src/otx/algorithms").filter(task_type="SEGMENTATION").templates +MODEL_IDS = [template.model_template_id for template in MODEL_TEMPLATES] + + +class TestPerfSemanticSegmentation: + """Benchmark basic semantic segmentation.""" + + BENCHMARK_CONFIGS = { + "small": { + "tags": { + "task": "semantic_segmentation", + }, + "datasets": [ + "semantic_seg/kvasir_small/1", + "semantic_seg/kvasir_small/2", + "semantic_seg/kvasir_small/3", + ], + "subset_dir_names": {"train": "train", "val": "val", "test": "test"}, + "num_repeat": 3, + }, + "medium": { + "tags": { + "task": "semantic_segmentation", + }, + "datasets": [ + "semantic_seg/kvasir_medium", + ], + "subset_dir_names": {"train": "train", "val": "val", "test": "test"}, + "num_repeat": 3, + }, + "large": { + "tags": { + "task": "semantic_segmentation", + }, + "datasets": [ + "semantic_seg/kvasir_large", + ], + "subset_dir_names": {"train": "train", "val": "val", "test": "test"}, + "num_repeat": 1, + }, + } + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_accuracy(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark accruacy metrics.""" + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "accuracy"}, + ) + + @pytest.mark.parametrize("fxt_model_id", MODEL_TEMPLATES, ids=MODEL_IDS, indirect=True) + @pytest.mark.parametrize("fxt_benchmark", BENCHMARK_CONFIGS.items(), ids=BENCHMARK_CONFIGS.keys(), indirect=True) + def test_speed(self, fxt_model_id: str, fxt_benchmark: OTXBenchmark): + """Benchmark train time per iter / infer time per image.""" + fxt_benchmark.track_resources = True + result = fxt_benchmark.run( + model_id=fxt_model_id, + tags={"benchmark": "speed"}, + ) diff --git a/tools/experiment.py b/tools/experiment.py index 7a6de5ee568..f161a5a2372 100644 --- a/tools/experiment.py +++ b/tools/experiment.py @@ -790,7 +790,7 @@ def run_experiment_recipe(recipe_file: Union[str, Path], dryrun: bool = False): """ exp_recipe = ExpRecipeParser(recipe_file) output_path = exp_recipe.output_path - output_path.mkdir(exist_ok=True) + output_path.mkdir(parents=True, exist_ok=True) current_dir = os.getcwd() os.chdir(output_path)