Skip to content

Commit

Permalink
feat: add args for profiling engine caching (#3329)
Browse files Browse the repository at this point in the history
  • Loading branch information
zewenli98 authored Dec 18, 2024
1 parent 695ffd9 commit 062e4d6
Showing 1 changed file with 34 additions and 2 deletions.
36 changes: 34 additions & 2 deletions tools/perf/perf_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,13 @@ def run_dynamo(model, input_tensors, params, precision, batch_size):
min_block_size=params.get("min_block_size", 1),
debug=False,
truncate_long_and_double=params.get("truncate", False),
immutable_weights=params.get("immutable_weights", True),
strip_engine_weights=params.get("strip_engine_weights", False),
refit_identical_engine_weights=params.get(
"refit_identical_engine_weights", False
),
cache_built_engines=params.get("cache_built_engines", False),
reuse_cached_engines=params.get("reuse_cached_engines", False),
)
end_compile = timeit.default_timer()
compile_time_s = end_compile - start_compile
Expand Down Expand Up @@ -585,6 +592,31 @@ def run(
type=str,
help="Path of the output file where performance summary is written.",
)
arg_parser.add_argument(
"--immutable_weights",
action="store_true",
help="Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored.",
)
arg_parser.add_argument(
"--strip_engine_weights",
action="store_true",
help="Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required.",
)
arg_parser.add_argument(
"--refit_identical_engine_weights",
action="store_true",
help="Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs.",
)
arg_parser.add_argument(
"--cache_built_engines",
action="store_true",
help="Whether to save the compiled TRT engines to storage.",
)
arg_parser.add_argument(
"--reuse_cached_engines",
action="store_true",
help="Whether to load the compiled TRT engines from storage.",
)
args = arg_parser.parse_args()

# Create random input tensor of certain size
Expand All @@ -605,9 +637,9 @@ def run(
# Load PyTorch Model, if provided
if len(model_name_torch) > 0 and os.path.exists(model_name_torch):
print("Loading user provided torch model: ", model_name_torch)
model_torch = torch.load(model_name_torch).eval()
model_torch = torch.load(model_name_torch).cuda().eval()
elif model_name_torch in BENCHMARK_MODELS:
model_torch = BENCHMARK_MODELS[model_name_torch]["model"].eval()
model_torch = BENCHMARK_MODELS[model_name_torch]["model"].cuda().eval()

# If neither model type was provided
if (model is None) and (model_torch is None):
Expand Down

0 comments on commit 062e4d6

Please sign in to comment.