Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test for TensorRT version-compatible model support #6255

Merged
merged 12 commits into from
Sep 5, 2023
110 changes: 110 additions & 0 deletions qa/L0_trt_compat/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
fi
if [ -z "$REPO_VERSION" ]; then
echo -e "Repository version must be specified"
echo -e "\n***\n*** Test Failed\n***"
exit 1
fi

TEST_RESULT_FILE='test_results.txt'
COMPATIBILITY_TEST_PY=trt_compatibility_test.py
CLIENT_LOG="client.log"
DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

rm -fr models && mkdir models
cp -r $DATADIR/qa_identity_model_repository/plan_compatible_zero_1_float32 models/.

RET=0

if [ `ps | grep -c "tritonserver"` != "0" ]; then
echo -e "Tritonserver already running"
echo -e `ps | grep tritonserver`
exit 1
fi

run_server
if [ "$SERVER_PID" != "0" ]; then
cat $SERVER_LOG
echo -e "\n***\n*** FAILED: unexpected server start (version compatibility disabled): $SERVER\n***" >> $CLIENT_LOG
kill $SERVER_PID
wait $SERVER_PID
exit 1
fi
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved

EXPECTED_ERR="Internal Error (Cannot deserialize engine with lean runtime"
if ! grep "$EXPECTED_ERR" $SERVER_LOG; then
cat $SERVER_LOG
echo -e "\n***\n*** Failed to find expected error: ${EXPECTED_ERR} \n***"
RET=1
fi

SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120 --backend-config=tensorrt,version-compatible=true"

run_server
if [ "$SERVER_PID" == "0" ]; then
cat $SERVER_LOG
echo -e "\n***\n*** FAILED: unsuccessful server start (version compatibility enabled): $SERVER\n***"
exit 1
fi

tanmayv25 marked this conversation as resolved.
Show resolved Hide resolved
tanmayv25 marked this conversation as resolved.
Show resolved Hide resolved
set +e

python $COMPATIBILITY_TEST_PY >$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Failed\n***"
RET=1
else
check_test_results $TEST_RESULT_FILE 1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi

set -e

kill $SERVER_PID
wait $SERVER_PID

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET
50 changes: 50 additions & 0 deletions qa/L0_trt_compat/trt_compatibility_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import unittest

import infer_util as iu
import numpy as np
import test_util as tu


class TrtCompatibilityTest(tu.TestResultCollector):
def setUp(self):
self._data_type = np.float32

def test_plan(self):
# plan_compatible_zero_1_float32 is an identity model with input shape [-1]
iu.infer_zero(self, "plan_compatible", 1, self._data_type, [[2, 4]], [[2, 4]])


if __name__ == "__main__":
unittest.main()
42 changes: 32 additions & 10 deletions qa/common/gen_qa_identity_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,16 +1027,22 @@ def create_plan_dynamic_modelfile(
config = builder.create_builder_config()
config.add_optimization_profile(profile)
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
if FLAGS.tensorrt_compat:
config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)
try:
engine_bytes = builder.build_serialized_network(network, config)
except AttributeError:
engine = builder.build_engine(network, config)
engine_bytes = engine.serialize()
del engine

model_name = tu.get_zero_model_name(
"plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
)
model_name_base = "plan"
if max_batch == 0:
model_name_base += "_nobatch"
if FLAGS.tensorrt_compat:
model_name_base += "_compatible"

model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
model_version_dir = os.path.join(models_dir, model_name, str(model_version))
os.makedirs(model_version_dir, exist_ok=True)

Expand All @@ -1052,9 +1058,12 @@ def create_plan_modelconfig(

shape_str = tu.shape_to_dims_str(shape)

model_name = tu.get_zero_model_name(
"plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
)
model_name_base = "plan"
if max_batch == 0:
model_name_base += "_nobatch"
if FLAGS.tensorrt_compat:
model_name_base += "_compatible"
model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
config_dir = os.path.join(models_dir, model_name)

if FLAGS.tensorrt_shape_io:
Expand Down Expand Up @@ -1228,7 +1237,7 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
True, models_dir, model_version, io_cnt, 0, dtype, shape
)

if FLAGS.tensorrt:
if FLAGS.tensorrt or FLAGS.tensorrt_compat:
create_plan_modelconfig(
True, models_dir, model_version, io_cnt, 8, dtype, shape
)
Expand Down Expand Up @@ -1336,6 +1345,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
action="store_true",
help="Generate TensorRT PLAN models w/ opt profile with large max",
)
parser.add_argument(
"--tensorrt-compat",
required=False,
action="store_true",
help="Generate TensorRT version-compatible models",
)
parser.add_argument(
"--tensorrt-shape-io",
required=False,
Expand All @@ -1360,7 +1375,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
if FLAGS.libtorch:
import torch
from torch import nn
if FLAGS.tensorrt or FLAGS.tensorrt_big or FLAGS.tensorrt_shape_io:
if (
FLAGS.tensorrt
or FLAGS.tensorrt_big
or FLAGS.tensorrt_compat
or FLAGS.tensorrt_shape_io
):
import tensorrt as trt
if FLAGS.openvino:
from openvino.inference_engine import IENetwork
Expand All @@ -1369,10 +1389,12 @@ def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
import test_util as tu

# Create models with variable-sized input and output. For big
# TensorRT models only create the one needed for performance
# testing
# and version-compatible TensorRT models, only create the one
# needed for testing.
if FLAGS.tensorrt_big:
create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1)
elif FLAGS.tensorrt_compat:
create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1, no_batch=False)
elif FLAGS.tensorrt_shape_io:
create_shape_tensor_models(FLAGS.models_dir, np.float32, [-1, -1], io_cnt=1)
else:
Expand Down
1 change: 1 addition & 0 deletions qa/common/gen_qa_model_repository
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ chmod -R 777 $DESTDIR
python3 $SRCDIR/gen_qa_models.py --tensorrt --variable --models_dir=$VARDESTDIR
chmod -R 777 $VARDESTDIR
python3 $SRCDIR/gen_qa_identity_models.py --tensorrt --models_dir=$IDENTITYDESTDIR
python3 $SRCDIR/gen_qa_identity_models.py --tensorrt-compat --models_dir=$IDENTITYDESTDIR
chmod -R 777 $IDENTITYDESTDIR
python3 $SRCDIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$IDENTITYBIGDESTDIR
chmod -R 777 $IDENTITYBIGDESTDIR
Expand Down
2 changes: 1 addition & 1 deletion qa/common/infer_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ def infer_shape_tensor(
import tritonclient.utils.shared_memory as shm

tester.assertTrue(use_http or use_grpc or use_streaming)
tester.assertTrue(pf == "plan" or pf == "plan_nobatch")
tester.assertTrue(pf.startswith("plan"))
tester.assertEqual(len(input_shape_values), len(dummy_input_shapes))

configs = []
Expand Down
Loading