Skip to content

Commit

Permalink
disable some models for torchbench (#6352)
Browse files Browse the repository at this point in the history
  • Loading branch information
zpcore authored and bhavya01 committed Apr 22, 2024
1 parent 7ce7311 commit cc02b8b
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 11 deletions.
12 changes: 7 additions & 5 deletions benchmarks/experiment_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ def generate_and_run_all_configs(self):

# Skip unsupported config.
if not self.model_loader.is_compatible(benchmark_model,
benchmark_experiment):
benchmark_experiment,
self._args.strict_compatible):
logger.warning("SKIP incompatible model and experiment configs.")
self._save_results(benchmark_experiment.to_dict(),
benchmark_model.to_dict(), {"error": "SKIP"})
Expand Down Expand Up @@ -841,7 +842,6 @@ def __str__(self):
parser.add_argument(
"--disable-tf32",
action="store_true",
default=False,
help="Whether to enable fast F32 multiplication in PyTorch.",
)
parser.add_argument(
Expand All @@ -864,21 +864,23 @@ def __str__(self):
parser.add_argument(
"--pure-wall-time",
action="store_true",
default=False,
help="Times wall time measurements with pure CUDA events. No kernel launch overhead.",
)
parser.add_argument(
"--filter-by-single-graph",
action="store_true",
default=False,
help="Runs the experiment with hard-failing when it detects there will be multiple graphs out of a single compiled region.",
)
parser.add_argument(
"--verify",
action="store_true",
default=False,
help="""If set, verifies the model output with PT Eager mode, and saves relative error to the output file."""
)
parser.add_argument(
"--strict-compatible",
action="store_true",
help="Strictly skips some models including models without installation file or causing stackdump.",
)
return parser.parse_args(args)


Expand Down
50 changes: 44 additions & 6 deletions benchmarks/torchbench_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,28 @@
"timm_vovnet",
"vgg16",
"hf_T5",
# PyTorch/benchmark sets its optimizer as SGD.
# Otherwise, OOMs.
"llama_v2_7b_16h",
}

# Skip the experiment of a model if any of the experiment configs in the list is fully matched
DENY_LIST = {
"cm3leon_generate": [
{
"test": "train",
},
{
"test": "eval",
"xla": "PJRT",
},
], # no install.py
"hf_T5_generate": [
{
"test": "train",
},
{
"test": "eval",
"xla": "PJRT",
},
], # no install.py
"doctr_det_predictor": [{
"test": "train"
},], # not implemented
Expand Down Expand Up @@ -127,6 +142,25 @@
"vision_maskrcnn": [{}],
}

# This strict deny list denies tests that hold for too long and timeoout.
STRICT_DENY_LIST = {
**{
"opacus_cifar10": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
"pytorch_stargan": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
"soft_actor_critic": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
"speech_transformer": [{
"accelerator": "tpu",
},], # stackdump issue in TPU
},
**DENY_LIST
}


class TorchBenchModelLoader(ModelLoader):

Expand Down Expand Up @@ -179,9 +213,13 @@ def list_model_configs(self):

return model_configs

def is_compatible(self, dummy_benchmark_model, benchmark_experiment):
if dummy_benchmark_model.model_name in DENY_LIST:
for deny_experiment_config in DENY_LIST[dummy_benchmark_model.model_name]:
def is_compatible(self,
dummy_benchmark_model,
benchmark_experiment,
use_strict_deny=False):
deny_list = STRICT_DENY_LIST if use_strict_deny else DENY_LIST
if dummy_benchmark_model.model_name in deny_list:
for deny_experiment_config in deny_list[dummy_benchmark_model.model_name]:
matched = True
for k, v in deny_experiment_config.items():
if getattr(benchmark_experiment, k) != v:
Expand Down

0 comments on commit cc02b8b

Please sign in to comment.