From 76c7f3c42a889ec218beec23145ce5fafbd86acd Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 4 Mar 2021 14:10:15 -0800 Subject: [PATCH] [BYOC][TensorRT] Make TRT runtime robust to empty or weird subgraphs (#7581) * Prevent TRT runtime crash for duplicate inputs and outputs * Add empty subgraph unit test --- .../contrib/tensorrt/tensorrt_builder.cc | 8 ++++ tests/python/contrib/test_tensorrt.py | 42 +++++++++++++++---- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index ee47e67001f3..09b36d720877 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -99,6 +99,14 @@ void TensorRTBuilder::AddOutput(const JSONGraphNodeEntry& node, uint32_t entry_i ICHECK(it != node_output_map_.end()) << "Output was not found."; auto out_tensor = it->second[node.index_].tensor; std::string name = "tensorrt_output_" + std::to_string(network_output_names_.size()); + // If the network is already marked as an input or output, make a copy to avoid TRT crash. + if (out_tensor->isNetworkOutput()) { + LOG(WARNING) << name << " is a duplicate output."; + out_tensor = network_->addIdentity(*out_tensor)->getOutput(0); + } else if (out_tensor->isNetworkInput()) { + LOG(WARNING) << name << " is both an input and an output."; + out_tensor = network_->addIdentity(*out_tensor)->getOutput(0); + } out_tensor->setName(name.c_str()); network_->markOutput(*out_tensor); network_output_names_.push_back(name); diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index 7ddc4e762cfd..60d6b2aa7571 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -71,6 +71,14 @@ def assert_result_dict_holds(result_dict): tvm.testing.assert_allclose(r1, r2, rtol=1e-3, atol=1e-3) +def set_func_attr(func, compile_name, symbol_name): + func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Compiler", compile_name) + func = func.with_attr("global_symbol", symbol_name) + return func + + def run_and_verify_func(config, target="cuda"): """Test a Relay func by compiling, running, and comparing TVM and TRT outputs. @@ -1109,13 +1117,6 @@ def test_dynamic_offload(): kernel = relay.var("kernel", shape=(k_shape), dtype="float32") def get_expected(): - def set_func_attr(func, compile_name, symbol_name): - func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) - func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) - func = func.with_attr("Compiler", compile_name) - func = func.with_attr("global_symbol", symbol_name) - return func - # Create a nested TRT function that matches the expected output mod = tvm.IRModule() var1 = relay.var("tensorrt_0_i0", shape=(data_shape), dtype="float32") @@ -1331,5 +1332,32 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: ) +def test_empty_subgraph(): + if skip_codegen_test(): + return + x_shape = (1, 3, 5) + mod = tvm.IRModule() + # Empty tensorrt subgraph. + var1 = relay.var("tensorrt_0_i0", shape=(x_shape), dtype="float32") + f1 = GlobalVar("tensorrt_0") + func = relay.Function([var1], var1) + func = set_func_attr(func, "tensorrt", "tensorrt_0") + mod[f1] = func + mod = relay.transform.InferType()(mod) + + # Create the main function + x = relay.var("x", shape=x_shape, dtype="float32") + out = f1(relay.nn.relu(x)) + f = relay.Function([x], out) + mod["main"] = f + + x_data = np.random.uniform(-1, 1, x_shape).astype("float32") + for mode in ["graph", "vm"]: + with tvm.transform.PassContext(opt_level=3): + exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda") + if not skip_runtime_test(): + results = exec.evaluate()(x_data) + + if __name__ == "__main__": pytest.main([__file__])