Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

disable some models for torchbench #6352

Merged
merged 16 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions benchmarks/experiment_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ def generate_and_run_all_configs(self):

# Skip unsupported config.
if not self.model_loader.is_compatible(benchmark_model,
benchmark_experiment):
benchmark_experiment,
self._args.strict_compatible):
logger.warning("SKIP incompatible model and experiment configs.")
self._save_results(benchmark_experiment.to_dict(),
benchmark_model.to_dict(), {"error": "SKIP"})
Expand Down Expand Up @@ -841,7 +842,6 @@ def __str__(self):
parser.add_argument(
"--disable-tf32",
action="store_true",
default=False,
help="Whether to enable fast F32 multiplication in PyTorch.",
)
parser.add_argument(
Expand All @@ -864,21 +864,23 @@ def __str__(self):
parser.add_argument(
"--pure-wall-time",
action="store_true",
default=False,
help="Times wall time measurements with pure CUDA events. No kernel launch overhead.",
)
parser.add_argument(
"--filter-by-single-graph",
action="store_true",
default=False,
help="Runs the experiment with hard-failing when it detects there will be multiple graphs out of a single compiled region.",
)
parser.add_argument(
"--verify",
action="store_true",
default=False,
help="""If set, verifies the model output with PT Eager mode, and saves relative error to the output file."""
)
parser.add_argument(
"--strict-compatible",
action="store_true",
help="Strictly skips some models including models without installation file or causing stackdump.",
)
return parser.parse_args(args)


Expand Down
50 changes: 44 additions & 6 deletions benchmarks/torchbench_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,28 @@
"timm_vovnet",
"vgg16",
"hf_T5",
# PyTorch/benchmark sets its optimizer as SGD.
# Otherwise, OOMs.
"llama_v2_7b_16h",
}

# Skip the experiment of a model if any of the experiment configs in the list is fully matched
DENY_LIST = {
"cm3leon_generate": [
{
"test": "train",
},
{
"test": "eval",
"xla": "PJRT",
},
], # no install.py
"hf_T5_generate": [
{
"test": "train",
},
{
"test": "eval",
"xla": "PJRT",
},
], # no install.py
zpcore marked this conversation as resolved.
Show resolved Hide resolved
"doctr_det_predictor": [{
"test": "train"
},], # not implemented
Expand Down Expand Up @@ -127,6 +142,25 @@
"vision_maskrcnn": [{}],
}

# This strict deny list denies tests that hold for too long and timeoout.
STRICT_DENY_LIST = {
**{
"opacus_cifar10": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
"pytorch_stargan": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
"soft_actor_critic": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
"speech_transformer": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
},
**DENY_LIST
}


class TorchBenchModelLoader(ModelLoader):

Expand Down Expand Up @@ -179,9 +213,13 @@ def list_model_configs(self):

return model_configs

def is_compatible(self, dummy_benchmark_model, benchmark_experiment):
if dummy_benchmark_model.model_name in DENY_LIST:
for deny_experiment_config in DENY_LIST[dummy_benchmark_model.model_name]:
def is_compatible(self,
dummy_benchmark_model,
benchmark_experiment,
use_strict_deny=False):
deny_list = STRICT_DENY_LIST if use_strict_deny else DENY_LIST
if dummy_benchmark_model.model_name in deny_list:
for deny_experiment_config in deny_list[dummy_benchmark_model.model_name]:
matched = True
for k, v in deny_experiment_config.items():
if getattr(benchmark_experiment, k) != v:
Expand Down