diff --git a/run.py b/run.py index 827bf2e496..4911835d77 100644 --- a/run.py +++ b/run.py @@ -101,9 +101,9 @@ def printResultSummaryTime(result_summary, metrics_needed=[], model=None, flops_ if flops_model_analyzer.metrics_backend_mapping['flops'] == 'dcgm': tflops_device_id, tflops = flops_model_analyzer.calculate_flops() else: - flops, batch_size = model.get_flops() - tflops = flops * batch_size / (cpu_walltime / 1.0e3) / 1.0e12 - print('{:<20} {:>20}'.format("GPU %d FLOPS:" % tflops_device_id, "%.4f TFLOPs per second" % tflops, sep='')) + flops = model.get_flops() + tflops = flops / (cpu_walltime / 1.0e3) / 1.0e12 + print('{:<20} {:>20}'.format("GPU FLOPS:", "%.4f TFLOPs per second" % tflops, sep='')) if gpu_peak_mem is not None: print('{:<20} {:>20}'.format("GPU %d Peak Memory:" % mem_device_id, "%.4f GB" % gpu_peak_mem, sep='')) if cpu_peak_mem is not None: diff --git a/torchbenchmark/util/backends/flops.py b/torchbenchmark/util/backends/flops.py deleted file mode 100644 index fa0a965b51..0000000000 --- a/torchbenchmark/util/backends/flops.py +++ /dev/null @@ -1,10 +0,0 @@ - -# By default, FlopCountAnalysis count one fused-mult-add (FMA) as one flop. -# However, in our context, we count 1 FMA as 2 flops instead of 1. -# https://github.com/facebookresearch/fvcore/blob/7a0ef0c0839fa0f5e24d2ef7f5d48712f36e7cd7/fvcore/nn/flop_count.py -def enable_fvcore_flops(model: 'torchbenchmark.util.model.BenchmarkModel', flops_fma=2.0): - assert hasattr(model, 'TORCHVISION_MODEL') and model.TORCHVISION_MODEL, "fvcore flops is only available on torchvision models!" - assert model.test == "eval", "fvcore flops is only available on inference tests, as it doesn't measure backward pass." - from fvcore.nn import FlopCountAnalysis - model.flops = FlopCountAnalysis(model.model, tuple(model.example_inputs)).total() - model.flops = model.flops / model.batch_size * flops_fma diff --git a/torchbenchmark/util/extra_args.py b/torchbenchmark/util/extra_args.py index f23bcfe647..d27ecfb1c0 100644 --- a/torchbenchmark/util/extra_args.py +++ b/torchbenchmark/util/extra_args.py @@ -2,9 +2,7 @@ import enum from typing import List, Optional, Tuple from torchbenchmark.util.backends import list_backends, BACKENDS - -from torchbenchmark.util.backends.flops import enable_fvcore_flops -from torchbenchmark.util.env_check import is_torchvision_model, is_staged_train_test +from torchbenchmark.util.env_check import is_staged_train_test TEST_STAGE = enum.Enum('TEST_STAGE', ['FORWARD', 'BACKWARD', 'OPTIMIZER', 'ALL']) AVAILABLE_PRECISIONS = ["fp32", "tf32", "fp16", "amp", "fx_int8", "bf16","amp_fp16", "amp_bf16"] @@ -127,7 +125,6 @@ def apply_decoration_args(model: 'torchbenchmark.util.model.BenchmarkModel', dar def parse_opt_args(model: 'torchbenchmark.util.model.BenchmarkModel', opt_args: List[str]) -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--backend", choices=list_backends(), help="enable backends") - parser.add_argument("--flops", choices=["fvcore", "dcgm"], help="Return the flops result") args, extra_args = parser.parse_known_args(opt_args) if model.jit: args.backend = "torchscript" @@ -137,7 +134,5 @@ def parse_opt_args(model: 'torchbenchmark.util.model.BenchmarkModel', opt_args: return args, extra_args def apply_opt_args(model: 'torchbenchmark.util.model.BenchmarkModel', args: argparse.Namespace): - if args.flops == "fvcore": - enable_fvcore_flops(model) if args.backend: model._enable_backend() diff --git a/torchbenchmark/util/framework/vision/model_factory.py b/torchbenchmark/util/framework/vision/model_factory.py index 97f3220f79..25e5dfadf0 100644 --- a/torchbenchmark/util/framework/vision/model_factory.py +++ b/torchbenchmark/util/framework/vision/model_factory.py @@ -48,7 +48,15 @@ def __init__(self, model_name, test, device, jit=False, batch_size=None, weights self.real_output = ( torch.rand_like(self.example_outputs), ) def get_flops(self): - return self.flops, self.batch_size + # By default, FlopCountAnalysis count one fused-mult-add (FMA) as one flop. + # However, in our context, we count 1 FMA as 2 flops instead of 1. + # https://github.com/facebookresearch/fvcore/blob/7a0ef0c0839fa0f5e24d2ef7f5d48712f36e7cd7/fvcore/nn/flop_count.py + assert self.test == "eval", "fvcore flops is only available on inference tests, as it doesn't measure backward pass." + from fvcore.nn import FlopCountAnalysis + FLOPS_FMA = 2.0 + self.flops = FlopCountAnalysis(self.model, tuple(self.example_inputs)).total() + self.flops = self.flops * FLOPS_FMA + return self.flops def gen_inputs(self, num_batches:int=1) -> Tuple[Generator, Optional[int]]: def _gen_inputs(): @@ -96,4 +104,3 @@ def cudagraph_eval(self): self.g.replay() break return (self.example_outputs, ) -