triton-inference-server · dyastremsky · Sep 5, 2023 · Aug 31, 2023 · Aug 31, 2023 · Aug 31, 2023
diff --git a/qa/L0_trt_compat/test.sh b/qa/L0_trt_compat/test.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+COMPATIBILITY_TEST_PY=trt_compatibility_test.py
+CLIENT_LOG="client.log"
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_identity_model_repository/plan_compatible_zero_1_float32 models/.
+
+RET=0
+
+if [ `ps | grep -c "tritonserver"` != "0" ]; then
+    echo -e "Tritonserver already running"
+    echo -e `ps | grep tritonserver`
+    exit 1
+fi
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** FAILED: unexpected server start (version compatibility disabled): $SERVER\n***" >> $CLIENT_LOG
+    kill $SERVER_PID
+    wait $SERVER_PID
+    exit 1
+fi
+
+EXPECTED_ERR="Internal Error (Cannot deserialize engine with lean runtime"
+if ! grep "$EXPECTED_ERR" $SERVER_LOG; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to find expected error: ${EXPECTED_ERR} \n***"
+    RET=1
+fi
+
+SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120 --backend-config=tensorrt,version-compatible=true"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** FAILED: unsuccessful server start (version compatibility enabled): $SERVER\n***"
+    exit 1
+fi
+
+set +e
+
+python $COMPATIBILITY_TEST_PY >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_compat/trt_compatibility_test.py b/qa/L0_trt_compat/trt_compatibility_test.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+
+class TrtCompatibilityTest(tu.TestResultCollector):
+    def setUp(self):
+        self._data_type = np.float32
+
+    def test_plan(self):
+        # plan_compatible_zero_1_float32 is an identity model with input shape [-1]
+        iu.infer_zero(self, "plan_compatible", 1, self._data_type, [[2, 4]], [[2, 4]])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py
@@ -1027,16 +1027,22 @@ def create_plan_dynamic_modelfile(
     config = builder.create_builder_config()
     config.add_optimization_profile(profile)
     config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    if FLAGS.tensorrt_compat:
+        config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)
     try:
         engine_bytes = builder.build_serialized_network(network, config)
     except AttributeError:
         engine = builder.build_engine(network, config)
         engine_bytes = engine.serialize()
         del engine
 
-    model_name = tu.get_zero_model_name(
-        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
-    )
+    model_name_base = "plan"
+    if max_batch == 0:
+        model_name_base += "_nobatch"
+    if FLAGS.tensorrt_compat:
+        model_name_base += "_compatible"
+
+    model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
     model_version_dir = os.path.join(models_dir, model_name, str(model_version))
     os.makedirs(model_version_dir, exist_ok=True)
 
@@ -1052,9 +1058,12 @@ def create_plan_modelconfig(
 
     shape_str = tu.shape_to_dims_str(shape)
 
-    model_name = tu.get_zero_model_name(
-        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
-    )
+    model_name_base = "plan"
+    if max_batch == 0:
+        model_name_base += "_nobatch"
+    if FLAGS.tensorrt_compat:
+        model_name_base += "_compatible"
+    model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
     config_dir = os.path.join(models_dir, model_name)
 
     if FLAGS.tensorrt_shape_io:
@@ -1228,7 +1237,7 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
                 True, models_dir, model_version, io_cnt, 0, dtype, shape
             )
 
-    if FLAGS.tensorrt:
+    if FLAGS.tensorrt or FLAGS.tensorrt_compat:
         create_plan_modelconfig(
             True, models_dir, model_version, io_cnt, 8, dtype, shape
         )
@@ -1336,6 +1345,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
         action="store_true",
         help="Generate TensorRT PLAN models w/ opt profile with large max",
     )
+    parser.add_argument(
+        "--tensorrt-compat",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT version-compatible models",
+    )
     parser.add_argument(
         "--tensorrt-shape-io",
         required=False,
@@ -1360,7 +1375,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
     if FLAGS.libtorch:
         import torch
         from torch import nn
-    if FLAGS.tensorrt or FLAGS.tensorrt_big or FLAGS.tensorrt_shape_io:
+    if (
+        FLAGS.tensorrt
+        or FLAGS.tensorrt_big
+        or FLAGS.tensorrt_compat
+        or FLAGS.tensorrt_shape_io
+    ):
         import tensorrt as trt
     if FLAGS.openvino:
         from openvino.inference_engine import IENetwork
@@ -1369,10 +1389,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
     import test_util as tu
 
     # Create models with variable-sized input and output. For big
-    # TensorRT models only create the one needed for performance
-    # testing
+    # and version-compatible TensorRT models, only create the one
+    # needed for testing.
     if FLAGS.tensorrt_big:
         create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1)
+    elif FLAGS.tensorrt_compat:
+        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1, no_batch=False)
     elif FLAGS.tensorrt_shape_io:
         create_shape_tensor_models(FLAGS.models_dir, np.float32, [-1, -1], io_cnt=1)
     else:

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
@@ -462,6 +462,7 @@ chmod -R 777 $DESTDIR
 python3 $SRCDIR/gen_qa_models.py --tensorrt --variable --models_dir=$VARDESTDIR
 chmod -R 777 $VARDESTDIR
 python3 $SRCDIR/gen_qa_identity_models.py --tensorrt --models_dir=$IDENTITYDESTDIR
+python3 $SRCDIR/gen_qa_identity_models.py --tensorrt-compat --models_dir=$IDENTITYDESTDIR
 chmod -R 777 $IDENTITYDESTDIR
 python3 $SRCDIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$IDENTITYBIGDESTDIR
 chmod -R 777 $IDENTITYBIGDESTDIR

diff --git a/qa/common/infer_util.py b/qa/common/infer_util.py
@@ -735,7 +735,7 @@ def infer_shape_tensor(
         import tritonclient.utils.shared_memory as shm
 
     tester.assertTrue(use_http or use_grpc or use_streaming)
-    tester.assertTrue(pf == "plan" or pf == "plan_nobatch")
+    tester.assertTrue(pf.startswith("plan"))
     tester.assertEqual(len(input_shape_values), len(dummy_input_shapes))
 
     configs = []