From f2e6a8f75942b1b2957aa0e67bd42e0938b151b0 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Mon, 21 Nov 2022 16:30:26 +0800
Subject: [PATCH 01/48] add backend support for fastdeploy server

---
 .../component/inference/fastdeploy_lib.py     |   54 +
 .../component/inference/fastdeploy_server.py  |  102 +
 .../inference/proto/model_config.protxt       | 1981 +++++++++++++++++
 .../proto/model_config/protxt_pb2.py          |  855 +++++++
 visualdl/server/app.py                        |   11 +
 5 files changed, 3003 insertions(+)
 create mode 100644 visualdl/component/inference/fastdeploy_lib.py
 create mode 100644 visualdl/component/inference/fastdeploy_server.py
 create mode 100644 visualdl/component/inference/proto/model_config.protxt
 create mode 100644 visualdl/component/inference/proto/model_config/protxt_pb2.py

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
new file mode 100644
index 000000000..0f56b70d0
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -0,0 +1,54 @@
+import multiprocessing
+from subprocess import CalledProcessError
+from subprocess import PIPE
+from subprocess import Popen
+
+import google.protobuf.json_format as json_format
+import google.protobuf.text_format as text_format
+
+from .proto.model_config.protxt_pb2 import ModelConfig
+
+
+def pbtxt2json(content: str):
+    '''
+  Convert protocol messages in text format to json format string.
+  '''
+    message = text_format.Parse(content, ModelConfig())
+    json_string = json_format.MessageToJson(message)
+    return json_string
+
+
+def json2pbtxt(content: str):
+    '''
+  Convert json format string to protocol messages in text format.
+  '''
+    message = json_format.Parse(content, ModelConfig())
+    text_proto = text_format.MessageToString(message)
+    return text_proto
+
+
+def launch_process(kwargs: dict):
+    '''
+  Launch a fastdeploy server according to specified arguments.
+  '''
+    cmd = ['fastdeployserver']
+    for key, value in kwargs.items():
+        cmd.append('--{}'.format(key))
+        cmd.append('{}'.foramt(value))
+    p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True)
+    return p
+
+
+def get_process_output(process):
+    '''
+  Get the standard output of a opened subprocess.
+  '''
+    for line in process.stdout:
+        yield line
+
+
+def kill_process(process):
+    '''
+  Stop a opened subprocess.
+  '''
+    process.kill()
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
new file mode 100644
index 000000000..d54a3a7a4
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
+import base64
+import json
+import os
+import tempfile
+from collections import deque
+from pathlib import Path
+from threading import Lock
+
+from flask import request
+
+from .fastdeploy_lib import get_process_output
+from .fastdeploy_lib import json2pbtxt
+from .fastdeploy_lib import kill_process
+from .fastdeploy_lib import launch_process
+from .fastdeploy_lib import pbtxt2json
+from visualdl.server.api import gen_result
+from visualdl.server.api import result
+
+
+class FastDeployServerApi(object):
+    def __init__(self):
+        self.root_dir = Path(os.getcwd())
+        self.opened_servers = {
+        }  # Use to store the opened server process pid and process itself
+
+    @result()
+    def get_directory(self, cur_dir):
+        if self.root_dir not in Path(os.path.abspath(cur_dir)).parents:
+            cur_dir = '.'
+        cur_dir, sub_dirs, filenames = os.walk(cur_dir).send(None)
+        if Path(self.root_dir) != Path(os.path.abspath(cur_dir)):
+            sub_dirs.append('..')
+        directorys = {
+            'parent_dir':
+            os.path.relpath(Path(os.path.abspath(cur_dir)), self.root_dir),
+            'sub_dir':
+            sub_dirs
+        }
+        return directorys
+
+    @result()
+    def get_config(self, cur_dir):
+        pass
+
+    @result()
+    def config_update(self, cur_dir, model_name):
+        pass
+
+    @result()
+    def start_server(self, configs):
+        process = launch_process(configs)
+        self.opened_servers[process.pid] = process
+        return process.pid
+
+    @result()
+    def stop_server(self, server_id):
+        if server_id not in self.opened_servers:
+            return
+        kill_process(self.opened_servers[server_id])
+        del self.opened_servers[server_id]
+
+    @result('text/plain')
+    def get_server_output(self, server_id):
+        stdout_generator = get_process_output(server_id)
+        return stdout_generator
+
+
+def create_fastdeploy_api_call():
+    api = FastDeployServerApi()
+    routes = {
+        'get_directory': (api.get_directory, ['dir']),
+        'config_update': (api.config_update, ['dir', 'name']),
+        'get_config': (api.get_config, ['dir']),
+        'start_server': (api.start_server, ['dir', 'args']),
+        'stop_server': (api.stop_server, ['server_id']),
+        'get_server_output': (api.get_server_output, ['server_id'])
+    }
+
+    def call(path: str, args):
+        route = routes.get(path)
+        if not route:
+            return json.dumps(gen_result(
+                status=1, msg='api not found')), 'application/json', None
+        method, call_arg_names = route
+        call_args = [args.get(name) for name in call_arg_names]
+        return method(*call_args)
+
+    return call
diff --git a/visualdl/component/inference/proto/model_config.protxt b/visualdl/component/inference/proto/model_config.protxt
new file mode 100644
index 000000000..1751f02f7
--- /dev/null
+++ b/visualdl/component/inference/proto/model_config.protxt
@@ -0,0 +1,1981 @@
+// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
+
+syntax = "proto3";
+
+package inference;
+
+//@@.. cpp:namespace:: inference
+
+//@@
+//@@.. cpp:enum:: DataType
+//@@
+//@@   Data types supported for input and output tensors.
+//@@
+enum DataType {
+  //@@  .. cpp:enumerator:: DataType::INVALID = 0
+  TYPE_INVALID = 0;
+
+  //@@  .. cpp:enumerator:: DataType::BOOL = 1
+  TYPE_BOOL = 1;
+
+  //@@  .. cpp:enumerator:: DataType::UINT8 = 2
+  TYPE_UINT8 = 2;
+  //@@  .. cpp:enumerator:: DataType::UINT16 = 3
+  TYPE_UINT16 = 3;
+  //@@  .. cpp:enumerator:: DataType::UINT32 = 4
+  TYPE_UINT32 = 4;
+  //@@  .. cpp:enumerator:: DataType::UINT64 = 5
+  TYPE_UINT64 = 5;
+
+  //@@  .. cpp:enumerator:: DataType::INT8 = 6
+  TYPE_INT8 = 6;
+  //@@  .. cpp:enumerator:: DataType::INT16 = 7
+  TYPE_INT16 = 7;
+  //@@  .. cpp:enumerator:: DataType::INT32 = 8
+  TYPE_INT32 = 8;
+  //@@  .. cpp:enumerator:: DataType::INT64 = 9
+  TYPE_INT64 = 9;
+
+  //@@  .. cpp:enumerator:: DataType::FP16 = 10
+  TYPE_FP16 = 10;
+  //@@  .. cpp:enumerator:: DataType::FP32 = 11
+  TYPE_FP32 = 11;
+  //@@  .. cpp:enumerator:: DataType::FP64 = 12
+  TYPE_FP64 = 12;
+
+  //@@  .. cpp:enumerator:: DataType::STRING = 13
+  TYPE_STRING = 13;
+
+  //@@  .. cpp:enumerator:: DataType::BF16 = 14
+  TYPE_BF16 = 14;
+}
+
+//@@
+//@@  .. cpp:var:: message ModelRateLimiter
+//@@
+//@@     The specifications required by the rate limiter to properly
+//@@     schedule the inference requests across the different models
+//@@     and their instances.
+//@@
+message ModelRateLimiter
+{
+  //@@  .. cpp:var:: message Resource
+  //@@
+  //@@     The resource property.
+  //@@
+  message Resource
+  {
+    //@@  .. cpp:var:: string name
+    //@@
+    //@@     The name associated with the resource.
+    //@@
+    string name = 1;
+
+    //@@  .. cpp:var:: bool global
+    //@@
+    //@@     Whether or not the resource is global. If true then the resource
+    //@@     is assumed to be shared among the devices otherwise specified
+    //@@     count of the resource is assumed for each device associated
+    //@@     with the instance.
+    //@@
+    bool global = 2;
+
+    //@@  .. cpp:var:: uint32 count
+    //@@
+    //@@     The number of resources required for the execution of the model
+    //@@     instance.
+    //@@
+    uint32 count = 3;
+  }
+
+  //@@  .. cpp:var:: Resource resources (repeated)
+  //@@
+  //@@     The resources required to execute the request on a model instance.
+  //@@     Resources are just names with a corresponding count. The execution
+  //@@     of the instance will be blocked until the specificied resources are
+  //@@     available. By default an instance uses no rate-limiter resources.
+  //@@
+  repeated Resource resources = 1;
+
+  //@@  .. cpp:var:: uint32 priority
+  //@@
+  //@@     The optional weighting value to be used for prioritizing across
+  //@@     instances. An instance with priority 2 will be given 1/2 the
+  //@@     number of scheduling chances as an instance_group with priority
+  //@@     1. The default priority is 1. The priority of value 0 will be
+  //@@     treated as priority 1.
+  //@@
+  uint32 priority = 2;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInstanceGroup
+//@@
+//@@   A group of one or more instances of a model and resources made
+//@@   available for those instances.
+//@@
+message ModelInstanceGroup
+{
+  //@@
+  //@@  .. cpp:enum:: Kind
+  //@@
+  //@@     Kind of this instance group.
+  //@@
+  enum Kind {
+    //@@    .. cpp:enumerator:: Kind::KIND_AUTO = 0
+    //@@
+    //@@       This instance group represents instances that can run on either
+    //@@       CPU or GPU. If all GPUs listed in 'gpus' are available then
+    //@@       instances will be created on GPU(s), otherwise instances will
+    //@@       be created on CPU.
+    //@@
+    KIND_AUTO = 0;
+
+    //@@    .. cpp:enumerator:: Kind::KIND_GPU = 1
+    //@@
+    //@@       This instance group represents instances that must run on the
+    //@@       GPU.
+    //@@
+    KIND_GPU = 1;
+
+    //@@    .. cpp:enumerator:: Kind::KIND_CPU = 2
+    //@@
+    //@@       This instance group represents instances that must run on the
+    //@@       CPU.
+    //@@
+    KIND_CPU = 2;
+
+    //@@    .. cpp:enumerator:: Kind::KIND_MODEL = 3
+    //@@
+    //@@       This instance group represents instances that should run on the
+    //@@       CPU and/or GPU(s) as specified by the model or backend itself.
+    //@@       The inference server will not override the model/backend
+    //@@       settings.
+    //@@
+    KIND_MODEL = 3;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message SecondaryDevice
+  //@@
+  //@@     A secondary device required for a model instance.
+  //@@
+  message SecondaryDevice
+  {
+    //@@
+    //@@  .. cpp:enum:: SecondaryDeviceKind
+    //@@
+    //@@     The kind of the secondary device.
+    //@@
+    enum SecondaryDeviceKind {
+      //@@    .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
+      //@@
+      //@@       An NVDLA core. http://nvdla.org
+      //@@       Currently KIND_NVDLA is only supported by the TensorRT backend.
+      //@@
+      KIND_NVDLA = 0;
+    }
+
+    //@@  .. cpp:var:: SecondaryDeviceKind kind
+    //@@
+    //@@     The secondary device kind.
+    //@@
+    SecondaryDeviceKind kind = 1;
+
+    //@@  .. cpp:var:: int64 device_id
+    //@@
+    //@@     Identifier for the secondary device.
+    //@@
+    int64 device_id = 2;
+  }
+
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     Optional name of this group of instances. If not specified the
+  //@@     name will be formed as <model name>_<group number>. The name of
+  //@@     individual instances will be further formed by a unique instance
+  //@@     number and GPU index:
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: Kind kind
+  //@@
+  //@@     The kind of this instance group. Default is KIND_AUTO. If
+  //@@     KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
+  //@@     may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
+  //@@     and 'gpu' cannot be specified.
+  //@@
+  Kind kind = 4;
+
+  //@@  .. cpp:var:: int32 count
+  //@@
+  //@@     For a group assigned to GPU, the number of instances created for
+  //@@     each GPU listed in 'gpus'. For a group assigned to CPU the number
+  //@@     of instances created. Default is 1.
+  int32 count = 2;
+
+  //@@  .. cpp:var:: ModelRateLimiter rate_limiter
+  //@@
+  //@@     The rate limiter specific settings to be associated with this
+  //@@     instance group. Optional, if not specified no rate limiting
+  //@@     will be applied to this instance group.
+  //@@
+  ModelRateLimiter rate_limiter = 6;
+
+  //@@  .. cpp:var:: int32 gpus (repeated)
+  //@@
+  //@@     GPU(s) where instances should be available. For each GPU listed,
+  //@@     'count' instances of the model will be available. Setting 'gpus'
+  //@@     to empty (or not specifying at all) is eqivalent to listing all
+  //@@     available GPUs.
+  //@@
+  repeated int32 gpus = 3;
+
+  //@@  .. cpp:var:: SecondaryDevice secondary_devices (repeated)
+  //@@
+  //@@     Secondary devices that are required by instances specified by this
+  //@@     instance group. Optional.
+  //@@
+  repeated SecondaryDevice secondary_devices = 8;
+
+  //@@  .. cpp:var:: string profile (repeated)
+  //@@
+  //@@     For TensorRT models containing multiple optimization profile, this
+  //@@     parameter specifies a set of optimization profiles available to this
+  //@@     instance group. The inference server will choose the optimal profile
+  //@@     based on the shapes of the input tensors. This field should lie
+  //@@     between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
+  //@@     and be specified only for TensorRT backend, otherwise an error will
+  //@@     be generated. If not specified, the server will select the first
+  //@@     optimization profile by default.
+  //@@
+  repeated string profile = 5;
+
+  //@@  .. cpp:var:: bool passive
+  //@@
+  //@@     Whether the instances within this instance group will be accepting
+  //@@     inference requests from the scheduler. If true, the instances will
+  //@@     not be added to the scheduler. Default value is false.
+  //@@
+  bool passive = 7;
+
+  //@@  .. cpp:var:: string host_policy
+  //@@
+  //@@     The host policy name that the instance to be associated with.
+  //@@     The default value is set to reflect the device kind of the instance,
+  //@@     for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
+  //@@     KIND_GPU is "gpu_<gpu_id>".
+  //@@
+  string host_policy = 9;
+}
+
+//@@
+//@@.. cpp:var:: message ModelTensorReshape
+//@@
+//@@   Reshape specification for input and output tensors.
+//@@
+message ModelTensorReshape
+{
+  //@@  .. cpp:var:: int64 shape (repeated)
+  //@@
+  //@@     The shape to use for reshaping.
+  //@@
+  repeated int64 shape = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInput
+//@@
+//@@   An input required by the model.
+//@@
+message ModelInput
+{
+  //@@
+  //@@  .. cpp:enum:: Format
+  //@@
+  //@@     The format for the input.
+  //@@
+  enum Format {
+    //@@    .. cpp:enumerator:: Format::FORMAT_NONE = 0
+    //@@
+    //@@       The input has no specific format. This is the default.
+    //@@
+    FORMAT_NONE = 0;
+
+    //@@    .. cpp:enumerator:: Format::FORMAT_NHWC = 1
+    //@@
+    //@@       HWC image format. Tensors with this format require 3 dimensions
+    //@@       if the model does not support batching (max_batch_size = 0) or 4
+    //@@       dimensions if the model does support batching (max_batch_size
+    //@@       >= 1). In either case the 'dims' below should only specify the
+    //@@       3 non-batch dimensions (i.e. HWC or CHW).
+    //@@
+    FORMAT_NHWC = 1;
+
+    //@@    .. cpp:enumerator:: Format::FORMAT_NCHW = 2
+    //@@
+    //@@       CHW image format. Tensors with this format require 3 dimensions
+    //@@       if the model does not support batching (max_batch_size = 0) or 4
+    //@@       dimensions if the model does support batching (max_batch_size
+    //@@       >= 1). In either case the 'dims' below should only specify the
+    //@@       3 non-batch dimensions (i.e. HWC or CHW).
+    //@@
+    FORMAT_NCHW = 2;
+  }
+
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the input.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: DataType data_type
+  //@@
+  //@@     The data-type of the input.
+  //@@
+  DataType data_type = 2;
+
+  //@@  .. cpp:var:: Format format
+  //@@
+  //@@     The format of the input. Optional.
+  //@@
+  Format format = 3;
+
+  //@@  .. cpp:var:: int64 dims (repeated)
+  //@@
+  //@@     The dimensions/shape of the input tensor that must be provided
+  //@@     when invoking the inference API for this model.
+  //@@
+  repeated int64 dims = 4;
+
+  //@@  .. cpp:var:: ModelTensorReshape reshape
+  //@@
+  //@@     The shape expected for this input by the backend. The input will
+  //@@     be reshaped to this before being presented to the backend. The
+  //@@     reshape must have the same number of elements as the input shape
+  //@@     specified by 'dims'. Optional.
+  //@@
+  ModelTensorReshape reshape = 5;
+
+  //@@  .. cpp:var:: bool is_shape_tensor
+  //@@
+  //@@     Whether or not the input is a shape tensor to the model. This field
+  //@@     is currently supported only for the TensorRT model. An error will be
+  //@@     generated if this specification does not comply with underlying
+  //@@     model.
+  //@@
+  bool is_shape_tensor = 6;
+
+  //@@  .. cpp:var:: bool allow_ragged_batch
+  //@@
+  //@@     Whether or not the input is allowed to be "ragged" in a dynamically
+  //@@     created batch. Default is false indicating that two requests will
+  //@@     only be batched if this tensor has the same shape in both requests.
+  //@@     True indicates that two requests can be batched even if this tensor
+  //@@     has a different shape in each request.
+  //@@
+  bool allow_ragged_batch = 7;
+
+  //@@  .. cpp:var:: bool optional
+  //@@
+  //@@     Whether or not the input is optional for the model execution.
+  //@@     If true, the input is not required in the inference request.
+  //@@     Default value is false.
+  //@@
+  bool optional = 8;
+}
+
+//@@
+//@@.. cpp:var:: message ModelOutput
+//@@
+//@@   An output produced by the model.
+//@@
+message ModelOutput
+{
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the output.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: DataType data_type
+  //@@
+  //@@     The data-type of the output.
+  //@@
+  DataType data_type = 2;
+
+  //@@  .. cpp:var:: int64 dims (repeated)
+  //@@
+  //@@     The dimensions/shape of the output tensor.
+  //@@
+  repeated int64 dims = 3;
+
+  //@@  .. cpp:var:: ModelTensorReshape reshape
+  //@@
+  //@@     The shape produced for this output by the backend. The output will
+  //@@     be reshaped from this to the shape specifed in 'dims' before being
+  //@@     returned in the inference response. The reshape must have the same
+  //@@     number of elements as the output shape specified by 'dims'. Optional.
+  //@@
+  ModelTensorReshape reshape = 5;
+
+  //@@  .. cpp:var:: string label_filename
+  //@@
+  //@@     The label file associated with this output. Should be specified only
+  //@@     for outputs that represent classifications. Optional.
+  //@@
+  string label_filename = 4;
+
+
+  //@@  .. cpp:var:: bool is_shape_tensor
+  //@@
+  //@@     Whether or not the output is a shape tensor to the model. This field
+  //@@     is currently supported only for the TensorRT model. An error will be
+  //@@     generated if this specification does not comply with underlying
+  //@@     model.
+  //@@
+  bool is_shape_tensor = 6;
+}
+
+//@@  .. cpp:var:: message BatchInput
+//@@
+//@@     A batch input is an additional input that must be added by
+//@@     the backend based on all the requests in a batch.
+//@@
+message BatchInput
+{
+  //@@
+  //@@    .. cpp:enum:: Kind
+  //@@
+  //@@       The kind of the batch input.
+  //@@
+  enum Kind {
+    //@@      .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
+    //@@
+    //@@         The element count of the 'source_input' will be added as
+    //@@         input with shape [1].
+    //@@
+    BATCH_ELEMENT_COUNT = 0;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
+    //@@
+    //@@         The accumulated element count of the 'source_input' will be
+    //@@         added as input with shape [1]. For example, if there is a
+    //@@         batch of two request, each with 2 elements, an input of value
+    //@@         2 will be added to the first request, and an input of value
+    //@@         4 will be added to the second request.
+    //@@
+    BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
+
+    //@@      .. cpp:enumerator::
+    //@@         Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
+    //@@
+    //@@         The accumulated element count of the 'source_input' will be
+    //@@         added as input with shape [1], except for the first request
+    //@@         in the batch. For the first request in the batch, the input
+    //@@         will have shape [2] where the first element is value 0.
+    //@@
+    BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
+    //@@
+    //@@         Among the requests in the batch, the max element count of the
+    //@@         'source_input' will be added as input with shape
+    //@@         [max_element_count] for the first request in the batch.
+    //@@         For other requests, such input will be with shape [0].
+    //@@         The data of the tensor will be uninitialized.
+    //@@
+    BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
+    //@@
+    //@@         Among the requests in the batch, the shape of the
+    //@@         'source_input' will be added as input with shape
+    //@@         [batch_size, len(input_dim)]. For example, if one
+    //@@         batch-2 input with shape [3, 1] and batch-1 input
+    //@@         with shape [2, 2] are batched, the batch input will
+    //@@         have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
+    //@@
+    BATCH_ITEM_SHAPE = 4;
+
+    //@@      .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
+    //@@
+    //@@         Among the requests in the batch, the shape of the
+    //@@         'source_input' will be added as input with single dimensional
+    //@@         shape [batch_size * len(input_dim)]. For example, if one
+    //@@         batch-2 input with shape [3, 1] and batch-1 input
+    //@@         with shape [2, 2] are batched, the batch input will
+    //@@         have shape [6] and value [3, 1, 3, 1, 2, 2].
+    //@@
+    BATCH_ITEM_SHAPE_FLATTEN = 5;
+  }
+
+  //@@    .. cpp:var:: Kind kind
+  //@@
+  //@@       The kind of this batch input.
+  //@@
+  Kind kind = 1;
+
+  //@@    .. cpp:var:: string target_name (repeated)
+  //@@
+  //@@       The name of the model inputs that the backend will create
+  //@@       for this batch input.
+  //@@
+  repeated string target_name = 2;
+
+  //@@    .. cpp:var:: DataType data_type
+  //@@
+  //@@       The input's datatype. The data type can be TYPE_INT32 or
+  //@@       TYPE_FP32.
+  //@@
+  DataType data_type = 3;
+
+  //@@    .. cpp:var:: string source_input (repeated)
+  //@@
+  //@@       The backend derives the value for each batch input from one or
+  //@@       more other inputs. 'source_input' gives the names of those
+  //@@       inputs.
+  //@@
+  repeated string source_input = 4;
+}
+
+//@@.. cpp:var:: message BatchOutput
+//@@
+//@@   A batch output is an output produced by the model that must be handled
+//@@   differently by the backend based on all the requests in a batch.
+//@@
+message BatchOutput
+{
+  //@@
+  //@@  .. cpp:enum:: Kind
+  //@@
+  //@@     The kind of the batch output.
+  //@@
+  enum Kind {
+    //@@    .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
+    //@@
+    //@@       The output should be scattered according to the shape of
+    //@@       'source_input'. The dynamic dimension of the output will
+    //@@       be set to the value of the same dimension in the input.
+    //@@
+    BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
+  }
+
+  //@@  .. cpp:var:: string target_name (repeated)
+  //@@
+  //@@     The name of the outputs to be produced by this batch output
+  //@@     specification.
+  //@@
+  repeated string target_name = 1;
+
+  //@@  .. cpp:var:: Kind kind
+  //@@
+  //@@     The kind of this batch output.
+  //@@
+  Kind kind = 2;
+
+  //@@  .. cpp:var:: string source_input (repeated)
+  //@@
+  //@@     The backend derives each batch output from one or more inputs.
+  //@@     'source_input' gives the names of those inputs.
+  //@@
+  repeated string source_input = 3;
+}
+
+//@@
+//@@.. cpp:var:: message ModelVersionPolicy
+//@@
+//@@   Policy indicating which versions of a model should be made
+//@@   available by the inference server.
+//@@
+message ModelVersionPolicy
+{
+  //@@  .. cpp:var:: message Latest
+  //@@
+  //@@     Serve only the latest version(s) of a model. This is
+  //@@     the default policy.
+  //@@
+  message Latest
+  {
+    //@@    .. cpp:var:: uint32 num_versions
+    //@@
+    //@@       Serve only the 'num_versions' highest-numbered versions. T
+    //@@       The default value of 'num_versions' is 1, indicating that by
+    //@@       default only the single highest-number version of a
+    //@@       model will be served.
+    //@@
+    uint32 num_versions = 1;
+  }
+
+  //@@  .. cpp:var:: message All
+  //@@
+  //@@     Serve all versions of the model.
+  //@@
+  message All {}
+
+  //@@  .. cpp:var:: message Specific
+  //@@
+  //@@     Serve only specific versions of the model.
+  //@@
+  message Specific
+  {
+    //@@    .. cpp:var:: int64 versions (repeated)
+    //@@
+    //@@       The specific versions of the model that will be served.
+    //@@
+    repeated int64 versions = 1;
+  }
+
+  //@@  .. cpp:var:: oneof policy_choice
+  //@@
+  //@@     Each model must implement only a single version policy. The
+  //@@     default policy is 'Latest'.
+  //@@
+  oneof policy_choice
+  {
+    //@@    .. cpp:var:: Latest latest
+    //@@
+    //@@       Serve only latest version(s) of the model.
+    //@@
+    Latest latest = 1;
+
+    //@@    .. cpp:var:: All all
+    //@@
+    //@@       Serve all versions of the model.
+    //@@
+    All all = 2;
+
+    //@@    .. cpp:var:: Specific specific
+    //@@
+    //@@       Serve only specific version(s) of the model.
+    //@@
+    Specific specific = 3;
+  }
+}
+
+//@@
+//@@.. cpp:var:: message ModelOptimizationPolicy
+//@@
+//@@   Optimization settings for a model. These settings control if/how a
+//@@   model is optimized and prioritized by the backend framework when
+//@@   it is loaded.
+//@@
+message ModelOptimizationPolicy
+{
+  //@@
+  //@@  .. cpp:var:: message Graph
+  //@@
+  //@@     Enable generic graph optimization of the model. If not specified
+  //@@     the framework's default level of optimization is used. Supports
+  //@@     TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
+  //@@     causes XLA to be enabled/disabled for the model. For Onnx defaults
+  //@@     to enabling all optimizations, -1 enables only basic optimizations,
+  //@@     +1 enables only basic and extended optimizations.
+  //@@
+  message Graph
+  {
+    //@@    .. cpp:var:: int32 level
+    //@@
+    //@@       The optimization level. Defaults to 0 (zero) if not specified.
+    //@@
+    //@@         - -1: Disabled
+    //@@         -  0: Framework default
+    //@@         -  1+: Enable optimization level (greater values indicate
+    //@@            higher optimization levels)
+    //@@
+    int32 level = 1;
+  }
+
+  //@@
+  //@@  .. cpp:enum:: ModelPriority
+  //@@
+  //@@     Model priorities. A model will be given scheduling and execution
+  //@@     preference over models at lower priorities. Current model
+  //@@     priorities only work for TensorRT models.
+  //@@
+  enum ModelPriority {
+    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
+    //@@
+    //@@       The default model priority.
+    //@@
+    PRIORITY_DEFAULT = 0;
+
+    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
+    //@@
+    //@@       The maximum model priority.
+    //@@
+    PRIORITY_MAX = 1;
+
+    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
+    //@@
+    //@@       The minimum model priority.
+    //@@
+    PRIORITY_MIN = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message Cuda
+  //@@
+  //@@     CUDA-specific optimization settings.
+  //@@
+  message Cuda
+  {
+    //@@    .. cpp:var:: message GraphSpec
+    //@@
+    //@@       Specification of the CUDA graph to be captured.
+    //@@
+    message GraphSpec
+    {
+      //@@      .. cpp:var:: message Dims
+      //@@
+      //@@         Specification of tensor dimension.
+      //@@
+      message Shape
+      {
+        //@@        .. cpp:var:: int64 dim (repeated)
+        //@@
+        //@@           The dimension.
+        //@@
+        repeated int64 dim = 1;
+      }
+
+      message LowerBound
+      {
+        //@@      .. cpp:var:: int32 batch_size
+        //@@
+        //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
+        //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
+        //@@         be set to value between 1 and 'max_batch_size'.
+        //@@
+        int32 batch_size = 1;
+
+        //@@      .. cpp:var:: map<string, Shape> input
+        //@@
+        //@@         The specification of the inputs. 'Shape' is the shape of
+        //@@         the input without batching dimension.
+        //@@
+        map<string, Shape> input = 2;
+      }
+
+      //@@      .. cpp:var:: int32 batch_size
+      //@@
+      //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
+      //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
+      //@@         be set to value between 1 and 'max_batch_size'.
+      //@@
+      int32 batch_size = 1;
+
+      //@@      .. cpp:var:: map<string, Shape> input
+      //@@
+      //@@         The specification of the inputs. 'Shape' is the shape of the
+      //@@         input without batching dimension.
+      //@@
+      map<string, Shape> input = 2;
+
+      //@@      .. cpp:var:: LowerBound graph_lower_bound
+      //@@
+      //@@         Specify the lower bound of the CUDA graph. Optional.
+      //@@         If specified, the graph can be used for input shapes and
+      //@@         batch sizes that are in closed interval between the lower
+      //@@         bound specification and graph specification. For dynamic
+      //@@         shape model, this allows CUDA graphs to be launched
+      //@@         frequently without capturing all possible shape combinations.
+      //@@         However, using graph for shape combinations different from
+      //@@         the one used for capturing introduces uninitialized data for
+      //@@         execution and it may distort the inference result if
+      //@@         the model is sensitive to uninitialized data.
+      //@@
+      LowerBound graph_lower_bound = 3;
+    }
+
+    //@@    .. cpp:var:: bool graphs
+    //@@
+    //@@       Use CUDA graphs API to capture model operations and execute
+    //@@       them more efficiently. Default value is false.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    bool graphs = 1;
+
+    //@@    .. cpp:var:: bool busy_wait_events
+    //@@
+    //@@       Use busy-waiting to synchronize CUDA events to achieve minimum
+    //@@       latency from event complete to host thread to be notified, with
+    //@@       the cost of high CPU load. Default value is false.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    bool busy_wait_events = 2;
+
+    //@@    .. cpp:var:: GraphSpec graph_spec (repeated)
+    //@@
+    //@@       Specification of the CUDA graph to be captured. If not specified
+    //@@       and 'graphs' is true, the default CUDA graphs will be captured
+    //@@       based on model settings.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    repeated GraphSpec graph_spec = 3;
+
+    //@@    .. cpp:var:: bool output_copy_stream
+    //@@
+    //@@       Uses a CUDA stream separate from the inference stream to copy the
+    //@@       output to host. However, be aware that setting this option to
+    //@@       true will lead to an increase in the memory consumption of the
+    //@@       model as Triton will allocate twice as much GPU memory for its
+    //@@       I/O tensor buffers. Default value is false.
+    //@@       Currently only recognized by TensorRT backend.
+    //@@
+    bool output_copy_stream = 4;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message ExecutionAccelerators
+  //@@
+  //@@     Specify the preferred execution accelerators to be used to execute
+  //@@     the model. Currently only recognized by ONNX Runtime backend and
+  //@@     TensorFlow backend.
+  //@@
+  //@@     For ONNX Runtime backend, it will deploy the model with the execution
+  //@@     accelerators by priority, the priority is determined based on the
+  //@@     order that they are set, i.e. the provider at the front has highest
+  //@@     priority. Overall, the priority will be in the following order:
+  //@@         <gpu_execution_accelerator> (if instance is on GPU)
+  //@@         CUDA Execution Provider     (if instance is on GPU)
+  //@@         <cpu_execution_accelerator>
+  //@@         Default CPU Execution Provider
+  //@@
+  message ExecutionAccelerators
+  {
+    //@@
+    //@@  .. cpp:var:: message Accelerator
+    //@@
+    //@@     Specify the accelerator to be used to execute the model.
+    //@@     Accelerator with the same name may accept different parameters
+    //@@     depending on the backends.
+    //@@
+    message Accelerator
+    {
+      //@@    .. cpp:var:: string name
+      //@@
+      //@@       The name of the execution accelerator.
+      //@@
+      string name = 1;
+
+      //@@    .. cpp:var:: map<string, string> parameters
+      //@@
+      //@@       Additional paremeters used to configure the accelerator.
+      //@@
+      map<string, string> parameters = 2;
+    }
+
+    //@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
+    //@@
+    //@@       The preferred execution provider to be used if the model instance
+    //@@       is deployed on GPU.
+    //@@
+    //@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
+    //@@       and no parameters are required.
+    //@@
+    //@@       For TensorFlow backend, possible values are "tensorrt",
+    //@@       "auto_mixed_precision", "gpu_io".
+    //@@
+    //@@       For "tensorrt", the following parameters can be specified:
+    //@@         "precision_mode": The precision used for optimization.
+    //@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
+    //@@
+    //@@         "max_cached_engines": The maximum number of cached TensorRT
+    //@@         engines in dynamic TensorRT ops. Default value is 100.
+    //@@
+    //@@         "minimum_segment_size": The smallest model subgraph that will
+    //@@         be considered for optimization by TensorRT. Default value is 3.
+    //@@
+    //@@         "max_workspace_size_bytes": The maximum GPU memory the model
+    //@@         can use temporarily during execution. Default value is 1GB.
+    //@@
+    //@@       For "auto_mixed_precision", no parameters are required. If set,
+    //@@       the model will try to use FP16 for better performance.
+    //@@       This optimization can not be set with "tensorrt".
+    //@@
+    //@@       For "gpu_io", no parameters are required. If set, the model will
+    //@@       be executed using TensorFlow Callable API to set input and output
+    //@@       tensors in GPU memory if possible, which can reduce data transfer
+    //@@       overhead if the model is used in ensemble. However, the Callable
+    //@@       object will be created on model creation and it will request all
+    //@@       outputs for every model execution, which may impact the
+    //@@       performance if a request does not require all outputs. This
+    //@@       optimization will only take affect if the model instance is
+    //@@       created with KIND_GPU.
+    //@@
+    repeated Accelerator gpu_execution_accelerator = 1;
+
+    //@@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
+    //@@
+    //@@       The preferred execution provider to be used if the model instance
+    //@@       is deployed on CPU.
+    //@@
+    //@@       For ONNX Runtime backend, possible value is "openvino" as name,
+    //@@       and no parameters are required.
+    //@@
+    repeated Accelerator cpu_execution_accelerator = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message PinnedMemoryBuffer
+  //@@
+  //@@     Specify whether to use a pinned memory buffer when transferring data
+  //@@     between non-pinned system memory and GPU memory. Using a pinned
+  //@@     memory buffer for system from/to GPU transfers will typically provide
+  //@@     increased performance. For example, in the common use case where the
+  //@@     request provides inputs and delivers outputs via non-pinned system
+  //@@     memory, if the model instance accepts GPU IOs, the inputs will be
+  //@@     processed by two copies: from non-pinned system memory to pinned
+  //@@     memory, and from pinned memory to GPU memory. Similarly, pinned
+  //@@     memory will be used for delivering the outputs.
+  //@@
+  message PinnedMemoryBuffer
+  {
+    //@@    .. cpp:var:: bool enable
+    //@@
+    //@@       Use pinned memory buffer. Default is true.
+    //@@
+    bool enable = 1;
+  }
+
+  //@@  .. cpp:var:: Graph graph
+  //@@
+  //@@     The graph optimization setting for the model. Optional.
+  //@@
+  Graph graph = 1;
+
+  //@@  .. cpp:var:: ModelPriority priority
+  //@@
+  //@@     The priority setting for the model. Optional.
+  //@@
+  ModelPriority priority = 2;
+
+  //@@  .. cpp:var:: Cuda cuda
+  //@@
+  //@@     CUDA-specific optimization settings. Optional.
+  //@@
+  Cuda cuda = 3;
+
+  //@@  .. cpp:var:: ExecutionAccelerators execution_accelerators
+  //@@
+  //@@     The accelerators used for the model. Optional.
+  //@@
+  ExecutionAccelerators execution_accelerators = 4;
+
+  //@@  .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
+  //@@
+  //@@     Use pinned memory buffer when the data transfer for inputs
+  //@@     is between GPU memory and non-pinned system memory.
+  //@@     Default is true.
+  //@@
+  PinnedMemoryBuffer input_pinned_memory = 5;
+
+  //@@  .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
+  //@@
+  //@@     Use pinned memory buffer when the data transfer for outputs
+  //@@     is between GPU memory and non-pinned system memory.
+  //@@     Default is true.
+  //@@
+  PinnedMemoryBuffer output_pinned_memory = 6;
+
+  //@@  .. cpp:var:: uint32 gather_kernel_buffer_threshold
+  //@@
+  //@@     The backend may use a gather kernel to gather input data if the
+  //@@     device has direct access to the source buffer and the destination
+  //@@     buffer. In such case, the gather kernel will be used only if the
+  //@@     number of buffers to be gathered is greater or equal to
+  //@@     the specifed value. If 0, the gather kernel will be disabled.
+  //@@     Default value is 0.
+  //@@     Currently only recognized by TensorRT backend.
+  //@@
+  uint32 gather_kernel_buffer_threshold = 7;
+
+  //@@  .. cpp:var:: bool eager_batching
+  //@@
+  //@@     Start preparing the next batch before the model instance is ready
+  //@@     for the next inference. This option can be used to overlap the
+  //@@     batch preparation with model execution, with the trade-off that
+  //@@     the next batch might be smaller than what it could have been.
+  //@@     Default value is false.
+  //@@     Currently only recognized by TensorRT backend.
+  //@@
+  bool eager_batching = 8;
+}
+
+//@@
+//@@.. cpp:var:: message ModelQueuePolicy
+//@@
+//@@   Queue policy for inference requests.
+//@@
+message ModelQueuePolicy
+{
+  //@@
+  //@@  .. cpp:enum:: TimeoutAction
+  //@@
+  //@@     The action applied to timed-out requests.
+  //@@
+  enum TimeoutAction {
+    //@@    .. cpp:enumerator:: Action::REJECT = 0
+    //@@
+    //@@       Reject the request and return error message accordingly.
+    //@@
+    REJECT = 0;
+
+    //@@    .. cpp:enumerator:: Action::DELAY = 1
+    //@@
+    //@@       Delay the request until all other requests at the same
+    //@@       (or higher) priority levels that have not reached their timeouts
+    //@@       are processed. A delayed request will eventually be processed,
+    //@@       but may be delayed indefinitely due to newly arriving requests.
+    //@@
+    DELAY = 1;
+  }
+
+  //@@
+  //@@  .. cpp:var:: TimeoutAction timeout_action
+  //@@
+  //@@     The action applied to timed-out request.
+  //@@     The default action is REJECT.
+  //@@
+  TimeoutAction timeout_action = 1;
+
+  //@@
+  //@@  .. cpp:var:: uint64 default_timeout_microseconds
+  //@@
+  //@@     The default timeout for every request, in microseconds.
+  //@@     The default value is 0 which indicates that no timeout is set.
+  //@@
+  uint64 default_timeout_microseconds = 2;
+
+  //@@
+  //@@  .. cpp:var:: bool allow_timeout_override
+  //@@
+  //@@     Whether individual request can override the default timeout value.
+  //@@     When true, individual requests can set a timeout that is less than
+  //@@     the default timeout value but may not increase the timeout.
+  //@@     The default value is false.
+  //@@
+  bool allow_timeout_override = 3;
+
+  //@@
+  //@@  .. cpp:var:: uint32 max_queue_size
+  //@@
+  //@@     The maximum queue size for holding requests. A request will be
+  //@@     rejected immediately if it can't be enqueued because the queue is
+  //@@     full. The default value is 0 which indicates that no maximum
+  //@@     queue size is enforced.
+  //@@
+  uint32 max_queue_size = 4;
+}
+
+//@@
+//@@.. cpp:var:: message ModelDynamicBatching
+//@@
+//@@   Dynamic batching configuration. These settings control how dynamic
+//@@   batching operates for the model.
+//@@
+message ModelDynamicBatching
+{
+  //@@  .. cpp:var:: int32 preferred_batch_size (repeated)
+  //@@
+  //@@     Preferred batch sizes for dynamic batching. If a batch of one of
+  //@@     these sizes can be formed it will be executed immediately.  If
+  //@@     not specified a preferred batch size will be chosen automatically
+  //@@     based on model and GPU characteristics.
+  //@@
+  repeated int32 preferred_batch_size = 1;
+
+  //@@  .. cpp:var:: uint64 max_queue_delay_microseconds
+  //@@
+  //@@     The maximum time, in microseconds, a request will be delayed in
+  //@@     the scheduling queue to wait for additional requests for
+  //@@     batching. Default is 0.
+  //@@
+  uint64 max_queue_delay_microseconds = 2;
+
+  //@@  .. cpp:var:: bool preserve_ordering
+  //@@
+  //@@     Should the dynamic batcher preserve the ordering of responses to
+  //@@     match the order of requests received by the scheduler. Default is
+  //@@     false. If true, the responses will be returned in the same order as
+  //@@     the order of requests sent to the scheduler. If false, the responses
+  //@@     may be returned in arbitrary order. This option is specifically
+  //@@     needed when a sequence of related inference requests (i.e. inference
+  //@@     requests with the same correlation ID) are sent to the dynamic
+  //@@     batcher to ensure that the sequence responses are in the correct
+  //@@     order.
+  //@@
+  bool preserve_ordering = 3;
+
+  //@@  .. cpp:var:: uint32 priority_levels
+  //@@
+  //@@     The number of priority levels to be enabled for the model,
+  //@@     the priority level starts from 1 and 1 is the highest priority.
+  //@@     Requests are handled in priority order with all priority 1 requests
+  //@@     processed before priority 2, all priority 2 requests processed before
+  //@@     priority 3, etc. Requests with the same priority level will be
+  //@@     handled in the order that they are received.
+  //@@
+  uint32 priority_levels = 4;
+
+  //@@  .. cpp:var:: uint32 default_priority_level
+  //@@
+  //@@     The priority level used for requests that don't specify their
+  //@@     priority. The value must be in the range [ 1, 'priority_levels' ].
+  //@@
+  uint32 default_priority_level = 5;
+
+  //@@  .. cpp:var:: ModelQueuePolicy default_queue_policy
+  //@@
+  //@@     The default queue policy used for requests that don't require
+  //@@     priority handling and requests that specify priority levels where
+  //@@     there is no specific policy given. If not specified, a policy with
+  //@@     default field values will be used.
+  //@@
+  ModelQueuePolicy default_queue_policy = 6;
+
+  //@@  .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy
+  //@@
+  //@@     Specify the queue policy for the priority level. The default queue
+  //@@     policy will be used if a priority level doesn't specify a queue
+  //@@     policy.
+  //@@
+  map<uint32, ModelQueuePolicy> priority_queue_policy = 7;
+}
+
+//@@
+//@@.. cpp:var:: message ModelSequenceBatching
+//@@
+//@@   Sequence batching configuration. These settings control how sequence
+//@@   batching operates for the model.
+//@@
+message ModelSequenceBatching
+{
+  //@@  .. cpp:var:: message Control
+  //@@
+  //@@     A control is a signal that the sequence batcher uses to
+  //@@     communicate with a backend.
+  //@@
+  message Control
+  {
+    //@@
+    //@@    .. cpp:enum:: Kind
+    //@@
+    //@@       The kind of the control.
+    //@@
+    enum Kind {
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
+      //@@
+      //@@         A new sequence is/is-not starting. If true a sequence is
+      //@@         starting, if false a sequence is continuing. Must
+      //@@         specify either int32_false_true, fp32_false_true or
+      //@@         bool_false_true for this control. This control is optional.
+      //@@
+      CONTROL_SEQUENCE_START = 0;
+
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
+      //@@
+      //@@         A sequence is/is-not ready for inference. If true the
+      //@@         input tensor data is valid and should be used. If false
+      //@@         the input tensor data is invalid and inferencing should
+      //@@         be "skipped". Must specify either int32_false_true,
+      //@@         fp32_false_true or bool_false_true for this control. This
+      //@@         control is optional.
+      //@@
+      CONTROL_SEQUENCE_READY = 1;
+
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
+      //@@
+      //@@         A sequence is/is-not ending. If true a sequence is
+      //@@         ending, if false a sequence is continuing. Must specify
+      //@@         either int32_false_true, fp32_false_true or bool_false_true
+      //@@         for this control. This control is optional.
+      //@@
+      CONTROL_SEQUENCE_END = 2;
+
+      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
+      //@@
+      //@@         The correlation ID of the sequence. The correlation ID
+      //@@         is an uint64_t value that is communicated in whole or
+      //@@         in part by the tensor. The tensor's datatype must be
+      //@@         specified by data_type and must be TYPE_UINT64, TYPE_INT64,
+      //@@         TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
+      //@@         the correlation ID will be truncated to the low-order 32
+      //@@         bits. This control is optional.
+      //@@
+      CONTROL_SEQUENCE_CORRID = 3;
+    }
+
+    //@@    .. cpp:var:: Kind kind
+    //@@
+    //@@       The kind of this control.
+    //@@
+    Kind kind = 1;
+
+    //@@    .. cpp:var:: int32 int32_false_true (repeated)
+    //@@
+    //@@       The control's true and false setting is indicated by setting
+    //@@       a value in an int32 tensor. The tensor must be a
+    //@@       1-dimensional tensor with size equal to the batch size of
+    //@@       the request. 'int32_false_true' must have two entries: the
+    //@@       first the false value and the second the true value.
+    //@@
+    repeated int32 int32_false_true = 2;
+
+    //@@    .. cpp:var:: float fp32_false_true (repeated)
+    //@@
+    //@@       The control's true and false setting is indicated by setting
+    //@@       a value in a fp32 tensor. The tensor must be a
+    //@@       1-dimensional tensor with size equal to the batch size of
+    //@@       the request. 'fp32_false_true' must have two entries: the
+    //@@       first the false value and the second the true value.
+    //@@
+    repeated float fp32_false_true = 3;
+
+    //@@    .. cpp:var:: bool bool_false_true (repeated)
+    //@@
+    //@@       The control's true and false setting is indicated by setting
+    //@@       a value in a bool tensor. The tensor must be a
+    //@@       1-dimensional tensor with size equal to the batch size of
+    //@@       the request. 'bool_false_true' must have two entries: the
+    //@@       first the false value and the second the true value.
+    //@@
+    repeated bool bool_false_true = 5;
+
+    //@@    .. cpp:var:: DataType data_type
+    //@@
+    //@@       The control's datatype.
+    //@@
+    DataType data_type = 4;
+  }
+
+  //@@  .. cpp:var:: message ControlInput
+  //@@
+  //@@     The sequence control values to communicate by a model input.
+  //@@
+  message ControlInput
+  {
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The name of the model input.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: Control control (repeated)
+    //@@
+    //@@       The control value(s) that should be communicated to the
+    //@@       model using this model input.
+    //@@
+    repeated Control control = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: message InitialState
+  //@@
+  //@@     Settings used to initialize data for implicit state.
+  //@@
+  message InitialState
+  {
+    //@@      .. cpp:var:: DataType data_type
+    //@@
+    //@@         The data-type of the state.
+    //@@
+    DataType data_type = 1;
+
+    //@@      .. cpp:var:: int64 dims (repeated)
+    //@@
+    //@@         The shape of the state tensor, not including the batch dimension.
+    //@@
+    repeated int64 dims = 2;
+
+    //@@      .. cpp:var:: oneof state_data
+    //@@
+    //@@         Specify how the initial state data is generated.
+    //@@
+    oneof state_data
+    {
+      //@@
+      //@@      .. cpp:var:: bool zero_data
+      //@@
+      //@@         The identifier for using zeros as initial state data.
+      //@@         Note that the value of 'zero_data' will not be checked,
+      //@@         instead, zero data will be used as long as the field is set.
+      //@@
+      bool zero_data = 3;
+
+      //@@      .. cpp:var:: string data_file
+      //@@
+      //@@         The file whose content will be used as the initial data for
+      //@@         the state in row-major order. The file must be provided in
+      //@@         sub-directory 'initial_state' under the model directory.
+      //@@
+      string data_file = 4;
+    }
+
+    //@@  .. cpp:var:: string name
+    //@@
+    //@@     The name of the state initialization.
+    //@@
+    string name = 5;
+  }
+
+  //@@  .. cpp:var:: message State
+  //@@
+  //@@     An input / output pair of tensors that carry state for the sequence.
+  //@@
+  message State
+  {
+    //@@    .. cpp:var:: string input_name
+    //@@
+    //@@       The name of the model state input.
+    //@@
+    string input_name = 1;
+
+    //@@    .. cpp:var:: string output_name
+    //@@
+    //@@       The name of the model state output.
+    //@@
+    string output_name = 2;
+
+    //@@    .. cpp:var:: DataType data_type
+    //@@
+    //@@       The data-type of the state.
+    //@@
+    DataType data_type = 3;
+
+    //@@    .. cpp:var:: int64 dim (repeated)
+    //@@
+    //@@       The dimension.
+    //@@
+    repeated int64 dims = 4;
+
+    //@@  .. cpp:var:: InitialState initial_state (repeated)
+    //@@
+    //@@     The optional field to specify the initial state for the model.
+    //@@
+    repeated InitialState initial_state = 5;
+  }
+
+  //@@  .. cpp:var:: message StrategyDirect
+  //@@
+  //@@     The sequence batcher uses a specific, unique batch
+  //@@     slot for each sequence. All inference requests in a
+  //@@     sequence are directed to the same batch slot in the same
+  //@@     model instance over the lifetime of the sequence. This
+  //@@     is the default strategy.
+  //@@
+  message StrategyDirect
+  {
+    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
+    //@@
+    //@@       The maximum time, in microseconds, a candidate request
+    //@@       will be delayed in the sequence batch scheduling queue to
+    //@@       wait for additional requests for batching. Default is 0.
+    //@@
+    uint64 max_queue_delay_microseconds = 1;
+
+    //@@    .. cpp:var:: float minimum_slot_utilization
+    //@@
+    //@@       The minimum slot utilization that must be satisfied to
+    //@@       execute the batch before 'max_queue_delay_microseconds' expires.
+    //@@       For example, a value of 0.5 indicates that the batch should be
+    //@@       executed as soon as 50% or more of the slots are ready even if
+    //@@       the 'max_queue_delay_microseconds' timeout has not expired.
+    //@@       The default is 0.0, indicating that a batch will be executed
+    //@@       before 'max_queue_delay_microseconds' timeout expires if at least
+    //@@       one batch slot is ready. 'max_queue_delay_microseconds' will be
+    //@@       ignored unless minimum_slot_utilization is set to a non-zero
+    //@@       value.
+    //@@
+    float minimum_slot_utilization = 2;
+  }
+
+  //@@  .. cpp:var:: message StrategyOldest
+  //@@
+  //@@     The sequence batcher maintains up to 'max_candidate_sequences'
+  //@@     candidate sequences. 'max_candidate_sequences' can be greater
+  //@@     than the model's 'max_batch_size'. For inferencing the batcher
+  //@@     chooses from the candidate sequences up to 'max_batch_size'
+  //@@     inference requests. Requests are chosen in an oldest-first
+  //@@     manner across all candidate sequences. A given sequence is
+  //@@     not guaranteed to be assigned to the same batch slot for
+  //@@     all inference requests of that sequence.
+  //@@
+  message StrategyOldest
+  {
+    //@@    .. cpp:var:: int32 max_candidate_sequences
+    //@@
+    //@@       Maximum number of candidate sequences that the batcher
+    //@@       maintains. Excess seqences are kept in an ordered backlog
+    //@@       and become candidates when existing candidate sequences
+    //@@       complete.
+    //@@
+    int32 max_candidate_sequences = 1;
+
+    //@@    .. cpp:var:: int32 preferred_batch_size (repeated)
+    //@@
+    //@@       Preferred batch sizes for dynamic batching of candidate
+    //@@       sequences. If a batch of one of these sizes can be formed
+    //@@       it will be executed immediately. If not specified a
+    //@@       preferred batch size will be chosen automatically
+    //@@       based on model and GPU characteristics.
+    //@@
+    repeated int32 preferred_batch_size = 2;
+
+    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
+    //@@
+    //@@       The maximum time, in microseconds, a candidate request
+    //@@       will be delayed in the dynamic batch scheduling queue to
+    //@@       wait for additional requests for batching. Default is 0.
+    //@@
+    uint64 max_queue_delay_microseconds = 3;
+  }
+
+  //@@  .. cpp:var:: oneof strategy_choice
+  //@@
+  //@@     The strategy used by the sequence batcher. Default strategy
+  //@@     is 'direct'.
+  //@@
+  oneof strategy_choice
+  {
+    //@@    .. cpp:var:: StrategyDirect direct
+    //@@
+    //@@       StrategyDirect scheduling strategy.
+    //@@
+    StrategyDirect direct = 3;
+
+    //@@    .. cpp:var:: StrategyOldest oldest
+    //@@
+    //@@       StrategyOldest scheduling strategy.
+    //@@
+    StrategyOldest oldest = 4;
+  }
+
+  //@@  .. cpp:var:: uint64 max_sequence_idle_microseconds
+  //@@
+  //@@     The maximum time, in microseconds, that a sequence is allowed to
+  //@@     be idle before it is aborted. The inference server considers a
+  //@@     sequence idle when it does not have any inference request queued
+  //@@     for the sequence. If this limit is exceeded, the inference server
+  //@@     will free the sequence slot allocated by the sequence and make it
+  //@@     available for another sequence. If not specified (or specified as
+  //@@     zero) a default value of 1000000 (1 second) is used.
+  //@@
+  uint64 max_sequence_idle_microseconds = 1;
+
+  //@@  .. cpp:var:: ControlInput control_input (repeated)
+  //@@
+  //@@     The model input(s) that the server should use to communicate
+  //@@     sequence start, stop, ready and similar control values to the
+  //@@     model.
+  //@@
+  repeated ControlInput control_input = 2;
+
+  //@@  .. cpp:var:: State state (repeated)
+  //@@
+  //@@     The optional state that can be stored in Triton for performing
+  //@@     inference requests on a sequence. Each sequence holds an implicit
+  //@@     state local to itself. The output state tensor provided by the
+  //@@     model in 'output_name' field of the current inference request will
+  //@@     be transferred as an input tensor named 'input_name' in the next
+  //@@     request of the same sequence. The input state of the first request
+  //@@     in the sequence contains garbage data.
+  //@@
+  repeated State state = 5;
+}
+
+//@@
+//@@.. cpp:var:: message ModelEnsembling
+//@@
+//@@   Model ensembling configuration. These settings specify the models that
+//@@   compose the ensemble and how data flows between the models.
+//@@
+message ModelEnsembling
+{
+  //@@  .. cpp:var:: message Step
+  //@@
+  //@@     Each step specifies a model included in the ensemble,
+  //@@     maps ensemble tensor names to the model input tensors,
+  //@@     and maps model output tensors to ensemble tensor names
+  //@@
+  message Step
+  {
+    //@@  .. cpp:var:: string model_name
+    //@@
+    //@@     The name of the model to execute for this step of the ensemble.
+    //@@
+    string model_name = 1;
+
+    //@@  .. cpp:var:: int64 model_version
+    //@@
+    //@@     The version of the model to use for inference. If -1
+    //@@     the latest/most-recent version of the model is used.
+    //@@
+    int64 model_version = 2;
+
+    //@@  .. cpp:var:: map<string,string> input_map
+    //@@
+    //@@     Map from name of an input tensor on this step's model to ensemble
+    //@@     tensor name. The ensemble tensor must have the same data type and
+    //@@     shape as the model input. Each model input must be assigned to
+    //@@     one ensemble tensor, but the same ensemble tensor can be assigned
+    //@@     to multiple model inputs.
+    //@@
+    map<string, string> input_map = 3;
+
+    //@@  .. cpp:var:: map<string,string> output_map
+    //@@
+    //@@     Map from name of an output tensor on this step's model to ensemble
+    //@@     tensor name. The data type and shape of the ensemble tensor will
+    //@@     be inferred from the model output. It is optional to assign all
+    //@@     model outputs to ensemble tensors. One ensemble tensor name
+    //@@     can appear in an output map only once.
+    //@@
+    map<string, string> output_map = 4;
+  }
+
+  //@@  .. cpp:var:: Step step (repeated)
+  //@@
+  //@@     The models and the input / output mappings used within the ensemble.
+  //@@
+  repeated Step step = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelParameter
+//@@
+//@@   A model parameter.
+//@@
+message ModelParameter
+{
+  //@@  .. cpp:var:: string string_value
+  //@@
+  //@@     The string value of the parameter.
+  //@@
+  string string_value = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelWarmup
+//@@
+//@@   Settings used to construct the request sample for model warmup.
+//@@
+message ModelWarmup
+{
+  //@@
+  //@@  .. cpp:var:: message Input
+  //@@
+  //@@     Meta data associated with an input.
+  //@@
+  message Input
+  {
+    //@@    .. cpp:var:: DataType data_type
+    //@@
+    //@@       The data-type of the input.
+    //@@
+    DataType data_type = 1;
+
+    //@@    .. cpp:var:: int64 dims (repeated)
+    //@@
+    //@@       The shape of the input tensor, not including the batch dimension.
+    //@@
+    repeated int64 dims = 2;
+
+    //@@    .. cpp:var:: oneof input_data_type
+    //@@
+    //@@       Specify how the input data is generated. If the input has STRING
+    //@@       data type and 'random_data' is set, the data generation will fall
+    //@@       back to 'zero_data'.
+    //@@
+    oneof input_data_type
+    {
+      //@@
+      //@@    .. cpp:var:: bool zero_data
+      //@@
+      //@@       The identifier for using zeros as input data. Note that the
+      //@@       value of 'zero_data' will not be checked, instead, zero data
+      //@@       will be used as long as the field is set.
+      //@@
+      bool zero_data = 3;
+
+      //@@
+      //@@    .. cpp:var:: bool random_data
+      //@@
+      //@@       The identifier for using random data as input data. Note that
+      //@@       the value of 'random_data' will not be checked, instead,
+      //@@       random data will be used as long as the field is set.
+      //@@
+      bool random_data = 4;
+
+      //@@    .. cpp:var:: string input_data_file
+      //@@
+      //@@       The file whose content will be used as raw input data in
+      //@@       row-major order. The file must be provided in a sub-directory
+      //@@       'warmup' under the model directory. The file contents should be
+      //@@       in binary format. For TYPE_STRING data-type, an element is
+      //@@       represented by a 4-byte unsigned integer giving the length 
+      //@@       followed by the actual bytes.
+      //@@
+      string input_data_file = 5;
+    }
+  }
+
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the request sample.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: uint32 batch_size
+  //@@
+  //@@     The batch size of the inference request. This must be >= 1. For
+  //@@     models that don't support batching, batch_size must be 1. If
+  //@@     batch_size > 1, the 'inputs' specified below will be duplicated to
+  //@@     match the batch size requested.
+  //@@
+  uint32 batch_size = 2;
+
+  //@@  .. cpp:var:: map<string, Input> inputs
+  //@@
+  //@@     The warmup meta data associated with every model input, including
+  //@@     control tensors.
+  //@@
+  map<string, Input> inputs = 3;
+
+  //@@  .. cpp:var:: uint32 count
+  //@@
+  //@@     The number of iterations that this warmup sample will be executed.
+  //@@     For example, if this field is set to 2, 2 model executions using this
+  //@@     sample will be scheduled for warmup. Default value is 0 which
+  //@@     indicates that this sample will be used only once.
+  //@@     Note that for sequence model, 'count' may not work well
+  //@@     because the model often expect a valid sequence of requests which
+  //@@     should be represented by a series of warmup samples. 'count > 1'
+  //@@     essentially "resends" one of the sample, which may invalidate the
+  //@@     sequence and result in unexpected warmup failure.
+  //@@
+  uint32 count = 4;
+}
+
+//@@
+//@@ .. cpp:var:: message ModelOperations
+//@@
+//@@    The metadata of libraries providing custom operations for this model.
+//@@
+message ModelOperations
+{
+  //@@  .. cpp:var:: string op_library_filename (repeated)
+  //@@
+  //@@     Optional paths of the libraries providing custom operations for
+  //@@     this model. Valid only for ONNX models.
+  //@@
+  repeated string op_library_filename = 1;
+}
+
+//@@
+//@@ .. cpp:var:: message ModelTransactionPolicy
+//@@
+//@@    The specification that describes the nature of transactions
+//@@    to be expected from the model.
+//@@
+message ModelTransactionPolicy
+{
+  //@@  .. cpp:var:: bool decoupled
+  //@@
+  //@@     Indicates whether responses generated by the model are decoupled with
+  //@@     the requests issued to it, which means the number of responses
+  //@@     generated by model may differ from number of requests issued, and
+  //@@     that the responses may be out of order relative to the order of
+  //@@     requests. The default is false, which means the model will generate
+  //@@     exactly one response for each request.
+  //@@
+  bool decoupled = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelRepositoryAgents
+//@@
+//@@   The repository agents for the model.
+//@@
+message ModelRepositoryAgents
+{
+  //@@
+  //@@  .. cpp:var:: message Agent
+  //@@
+  //@@     A repository agent that should be invoked for the specified
+  //@@     repository actions for this model.
+  //@@
+  message Agent
+  {
+    //@@    .. cpp:var:: string name
+    //@@
+    //@@       The name of the agent.
+    //@@
+    string name = 1;
+
+    //@@    .. cpp:var:: map<string, string> parameters
+    //@@
+    //@@       The parameters for the agent.
+    //@@
+    map<string, string> parameters = 2;
+  }
+
+  //@@
+  //@@  .. cpp:var:: Agent agents (repeated)
+  //@@
+  //@@     The ordered list of agents for the model. These agents will be
+  //@@     invoked in order to respond to repository actions occuring for the
+  //@@     model.
+  //@@
+  repeated Agent agents = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelResponseCache
+//@@
+//@@   The response cache setting for the model.
+//@@
+message ModelResponseCache
+{
+  //@@
+  //@@  .. cpp::var:: bool enable
+  //@@
+  //@@     Whether or not to use response cache for the model. If True, the
+  //@@     responses from the model are cached and when identical request
+  //@@     is encountered, instead of going through the model execution,
+  //@@     the response from the cache is utilized. By default, response
+  //@@     cache is disabled for the models.
+  //@@
+  bool enable = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelConfig
+//@@
+//@@   A model configuration.
+//@@
+message ModelConfig
+{
+  //@@  .. cpp:var:: string name
+  //@@
+  //@@     The name of the model.
+  //@@
+  string name = 1;
+
+  //@@  .. cpp:var:: string platform
+  //@@
+  //@@     The framework for the model. Possible values are
+  //@@     "tensorrt_plan", "tensorflow_graphdef",
+  //@@     "tensorflow_savedmodel", "onnxruntime_onnx",
+  //@@     "pytorch_libtorch".
+  //@@
+  string platform = 2;
+
+  //@@  .. cpp:var:: string backend
+  //@@
+  //@@     The backend used by the model.
+  //@@
+  string backend = 17;
+
+  //@@  .. cpp:var:: ModelVersionPolicy version_policy
+  //@@
+  //@@     Policy indicating which version(s) of the model will be served.
+  //@@
+  ModelVersionPolicy version_policy = 3;
+
+  //@@  .. cpp:var:: int32 max_batch_size
+  //@@
+  //@@     Maximum batch size allowed for inference. This can only decrease
+  //@@     what is allowed by the model itself. A max_batch_size value of 0
+  //@@     indicates that batching is not allowed for the model and the
+  //@@     dimension/shape of the input and output tensors must exactly
+  //@@     match what is specified in the input and output configuration. A
+  //@@     max_batch_size value > 0 indicates that batching is allowed and
+  //@@     so the model expects the input tensors to have an additional
+  //@@     initial dimension for the batching that is not specified in the
+  //@@     input (for example, if the model supports batched inputs of
+  //@@     2-dimensional tensors then the model configuration will specify
+  //@@     the input shape as [ X, Y ] but the model will expect the actual
+  //@@     input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
+  //@@     returned outputs will also have an additional initial dimension
+  //@@     for the batch.
+  //@@
+  int32 max_batch_size = 4;
+
+  //@@  .. cpp:var:: ModelInput input (repeated)
+  //@@
+  //@@     The inputs request by the model.
+  //@@
+  repeated ModelInput input = 5;
+
+  //@@  .. cpp:var:: ModelOutput output (repeated)
+  //@@
+  //@@     The outputs produced by the model.
+  //@@
+  repeated ModelOutput output = 6;
+
+  //@@  .. cpp:var:: BatchInput batch_input (repeated)
+  //@@
+  //@@     The model input(s) that the server should use to communicate
+  //@@     batch related values to the model.
+  //@@
+  repeated BatchInput batch_input = 20;
+
+  //@@  .. cpp:var:: BatchOutput batch_output (repeated)
+  //@@
+  //@@     The outputs produced by the model that requires special handling
+  //@@     by the model backend.
+  //@@
+  repeated BatchOutput batch_output = 21;
+
+  //@@  .. cpp:var:: ModelOptimizationPolicy optimization
+  //@@
+  //@@     Optimization configuration for the model. If not specified
+  //@@     then default optimization policy is used.
+  //@@
+  ModelOptimizationPolicy optimization = 12;
+
+  //@@  .. cpp:var:: oneof scheduling_choice
+  //@@
+  //@@     The scheduling policy for the model. If not specified the
+  //@@     default scheduling policy is used for the model. The default
+  //@@     policy is to execute each inference request independently.
+  //@@
+  oneof scheduling_choice
+  {
+    //@@    .. cpp:var:: ModelDynamicBatching dynamic_batching
+    //@@
+    //@@       If specified, enables the dynamic-batching scheduling
+    //@@       policy. With dynamic-batching the scheduler may group
+    //@@       together independent requests into a single batch to
+    //@@       improve inference throughput.
+    //@@
+    ModelDynamicBatching dynamic_batching = 11;
+
+    //@@    .. cpp:var:: ModelSequenceBatching sequence_batching
+    //@@
+    //@@       If specified, enables the sequence-batching scheduling
+    //@@       policy. With sequence-batching, inference requests
+    //@@       with the same correlation ID are routed to the same
+    //@@       model instance. Multiple sequences of inference requests
+    //@@       may be batched together into a single batch to
+    //@@       improve inference throughput.
+    //@@
+    ModelSequenceBatching sequence_batching = 13;
+
+    //@@    .. cpp:var:: ModelEnsembling ensemble_scheduling
+    //@@
+    //@@       If specified, enables the model-ensembling scheduling
+    //@@       policy. With model-ensembling, inference requests
+    //@@       will be processed according to the specification, such as an
+    //@@       execution sequence of models. The input specified in this model
+    //@@       config will be the input for the ensemble, and the output
+    //@@       specified will be the output of the ensemble.
+    //@@
+    ModelEnsembling ensemble_scheduling = 15;
+  }
+
+  //@@  .. cpp:var:: ModelInstanceGroup instance_group (repeated)
+  //@@
+  //@@     Instances of this model. If not specified, one instance
+  //@@     of the model will be instantiated on each available GPU.
+  //@@
+  repeated ModelInstanceGroup instance_group = 7;
+
+  //@@  .. cpp:var:: string default_model_filename
+  //@@
+  //@@     Optional filename of the model file to use if a
+  //@@     compute-capability specific model is not specified in
+  //@@     :cpp:var:`cc_model_filenames`. If not specified the default name
+  //@@     is 'model.graphdef', 'model.savedmodel', 'model.plan' or
+  //@@     'model.pt' depending on the model type.
+  //@@
+  string default_model_filename = 8;
+
+  //@@  .. cpp:var:: map<string,string> cc_model_filenames
+  //@@
+  //@@     Optional map from CUDA compute capability to the filename of
+  //@@     the model that supports that compute capability. The filename
+  //@@     refers to a file within the model version directory.
+  //@@
+  map<string, string> cc_model_filenames = 9;
+
+  //@@  .. cpp:var:: map<string,string> metric_tags
+  //@@
+  //@@     Optional metric tags. User-specific key-value pairs for metrics
+  //@@     reported for this model. These tags are applied to the metrics
+  //@@     reported on the HTTP metrics port.
+  //@@
+  map<string, string> metric_tags = 10;
+
+  //@@  .. cpp:var:: map<string,ModelParameter> parameters
+  //@@
+  //@@     Optional model parameters. User-specified parameter values.
+  //@@
+  map<string, ModelParameter> parameters = 14;
+
+  //@@  .. cpp:var:: ModelWarmup model_warmup (repeated)
+  //@@
+  //@@     Warmup setting of this model. If specified, all instances
+  //@@     will be run with the request samples in sequence before
+  //@@     serving the model.
+  //@@     This field can only be specified if the model is not an ensemble
+  //@@     model.
+  //@@
+  repeated ModelWarmup model_warmup = 16;
+
+  //@@  .. cpp:var:: ModelOperations model_operations
+  //@@
+  //@@     Optional metadata of the libraries providing custom operations for
+  //@@     this model.
+  //@@
+  ModelOperations model_operations = 18;
+
+  //@@  .. cpp:var:: ModelTransactionPolicy model_transaction_policy
+  //@@
+  //@@     Optional specification that describes the nature of transactions
+  //@@     to be expected from the model.
+  //@@
+  ModelTransactionPolicy model_transaction_policy = 19;
+
+  //@@  .. cpp:var:: ModelRepositoryAgents model_repository_agents
+  //@@
+  //@@     Optional specification of the agent(s) that should be invoked
+  //@@     with repository actions are performed for this model.
+  //@@
+  ModelRepositoryAgents model_repository_agents = 23;
+
+  //@@  .. cpp:var:: ModelResponseCache response_cache
+  //@@
+  //@@     Optional setting for utilizing the response cache for this
+  //@@     model.
+  //@@
+  ModelResponseCache response_cache = 24;
+}
\ No newline at end of file
diff --git a/visualdl/component/inference/proto/model_config/protxt_pb2.py b/visualdl/component/inference/proto/model_config/protxt_pb2.py
new file mode 100644
index 000000000..4d5b645d4
--- /dev/null
+++ b/visualdl/component/inference/proto/model_config/protxt_pb2.py
@@ -0,0 +1,855 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: model_config.protxt
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import enum_type_wrapper
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3'
+)
+
+_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType']
+DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
+TYPE_INVALID = 0
+TYPE_BOOL = 1
+TYPE_UINT8 = 2
+TYPE_UINT16 = 3
+TYPE_UINT32 = 4
+TYPE_UINT64 = 5
+TYPE_INT8 = 6
+TYPE_INT16 = 7
+TYPE_INT32 = 8
+TYPE_INT64 = 9
+TYPE_FP16 = 10
+TYPE_FP32 = 11
+TYPE_FP64 = 12
+TYPE_STRING = 13
+TYPE_BF16 = 14
+
+_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter']
+_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource']
+_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup']
+_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[
+    'SecondaryDevice']
+_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape']
+_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput']
+_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput']
+_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput']
+_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput']
+_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy']
+_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest']
+_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All']
+_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[
+    'Specific']
+_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[
+    'ModelOptimizationPolicy']
+_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'Graph']
+_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'Cuda']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[
+    'GraphSpec']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+    'Shape']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+    'LowerBound']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[
+    'InputEntry']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+    'InputEntry']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'ExecutionAccelerators']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[
+    'Accelerator']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[
+    'ParametersEntry']
+_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'PinnedMemoryBuffer']
+_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy']
+_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[
+    'ModelDynamicBatching']
+_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[
+    'PriorityQueuePolicyEntry']
+_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[
+    'ModelSequenceBatching']
+_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'Control']
+_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'ControlInput']
+_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'InitialState']
+_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'State']
+_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'StrategyDirect']
+_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'StrategyOldest']
+_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling']
+_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step']
+_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
+    'InputMapEntry']
+_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
+    'OutputMapEntry']
+_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter']
+_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup']
+_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input']
+_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry']
+_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations']
+_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[
+    'ModelTransactionPolicy']
+_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[
+    'ModelRepositoryAgents']
+_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[
+    'Agent']
+_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[
+    'ParametersEntry']
+_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache']
+_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig']
+_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[
+    'CcModelFilenamesEntry']
+_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[
+    'MetricTagsEntry']
+_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[
+    'ParametersEntry']
+_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[
+    'SecondaryDeviceKind']
+_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind']
+_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format']
+_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind']
+_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind']
+_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[
+    'ModelPriority']
+_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[
+    'TimeoutAction']
+_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[
+    'Kind']
+ModelRateLimiter = _reflection.GeneratedProtocolMessageType(
+    'ModelRateLimiter',
+    (_message.Message, ),
+    {
+        'Resource':
+        _reflection.GeneratedProtocolMessageType(
+            'Resource',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELRATELIMITER_RESOURCE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource)
+            }),
+        'DESCRIPTOR':
+        _MODELRATELIMITER,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter)
+    })
+_sym_db.RegisterMessage(ModelRateLimiter)
+_sym_db.RegisterMessage(ModelRateLimiter.Resource)
+
+ModelInstanceGroup = _reflection.GeneratedProtocolMessageType(
+    'ModelInstanceGroup',
+    (_message.Message, ),
+    {
+        'SecondaryDevice':
+        _reflection.GeneratedProtocolMessageType(
+            'SecondaryDevice',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice)
+            }),
+        'DESCRIPTOR':
+        _MODELINSTANCEGROUP,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup)
+    })
+_sym_db.RegisterMessage(ModelInstanceGroup)
+_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice)
+
+ModelTensorReshape = _reflection.GeneratedProtocolMessageType(
+    'ModelTensorReshape',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELTENSORRESHAPE,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape)
+    })
+_sym_db.RegisterMessage(ModelTensorReshape)
+
+ModelInput = _reflection.GeneratedProtocolMessageType(
+    'ModelInput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELINPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelInput)
+    })
+_sym_db.RegisterMessage(ModelInput)
+
+ModelOutput = _reflection.GeneratedProtocolMessageType(
+    'ModelOutput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELOUTPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelOutput)
+    })
+_sym_db.RegisterMessage(ModelOutput)
+
+BatchInput = _reflection.GeneratedProtocolMessageType(
+    'BatchInput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _BATCHINPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.BatchInput)
+    })
+_sym_db.RegisterMessage(BatchInput)
+
+BatchOutput = _reflection.GeneratedProtocolMessageType(
+    'BatchOutput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _BATCHOUTPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.BatchOutput)
+    })
+_sym_db.RegisterMessage(BatchOutput)
+
+ModelVersionPolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelVersionPolicy',
+    (_message.Message, ),
+    {
+        'Latest':
+        _reflection.GeneratedProtocolMessageType(
+            'Latest',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest)
+            }),
+        'All':
+        _reflection.GeneratedProtocolMessageType(
+            'All',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELVERSIONPOLICY_ALL,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All)
+            }),
+        'Specific':
+        _reflection.GeneratedProtocolMessageType(
+            'Specific',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific)
+            }),
+        'DESCRIPTOR':
+        _MODELVERSIONPOLICY,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy)
+    })
+_sym_db.RegisterMessage(ModelVersionPolicy)
+_sym_db.RegisterMessage(ModelVersionPolicy.Latest)
+_sym_db.RegisterMessage(ModelVersionPolicy.All)
+_sym_db.RegisterMessage(ModelVersionPolicy.Specific)
+
+ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelOptimizationPolicy',
+    (_message.Message, ),
+    {
+        'Graph':
+        _reflection.GeneratedProtocolMessageType(
+            'Graph',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph)
+            }),
+        'Cuda':
+        _reflection.GeneratedProtocolMessageType(
+            'Cuda',
+            (_message.Message, ),
+            {
+                'GraphSpec':
+                _reflection.GeneratedProtocolMessageType(
+                    'GraphSpec',
+                    (_message.Message, ),
+                    {
+                        'Shape':
+                        _reflection.GeneratedProtocolMessageType(
+                            'Shape',
+                            (_message.Message, ),
+                            {
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
+                                '__module__': 'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
+                            }),
+                        'LowerBound':
+                        _reflection.GeneratedProtocolMessageType(
+                            'LowerBound',
+                            (_message.Message, ),
+                            {
+                                'InputEntry':
+                                _reflection.GeneratedProtocolMessageType(
+                                    'InputEntry',
+                                    (_message.Message, ),
+                                    {
+                                        'DESCRIPTOR':
+                                        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
+                                        '__module__': 'model_config.protxt_pb2'
+                                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
+                                    }),
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
+                                '__module__':
+                                'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
+                            }),
+                        'InputEntry':
+                        _reflection.GeneratedProtocolMessageType(
+                            'InputEntry',
+                            (_message.Message, ),
+                            {
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
+                                '__module__': 'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
+                            }),
+                        'DESCRIPTOR':
+                        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
+                        '__module__':
+                        'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec)
+                    }),
+                'DESCRIPTOR':
+                _MODELOPTIMIZATIONPOLICY_CUDA,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda)
+            }),
+        'ExecutionAccelerators':
+        _reflection.GeneratedProtocolMessageType(
+            'ExecutionAccelerators',
+            (_message.Message, ),
+            {
+                'Accelerator':
+                _reflection.GeneratedProtocolMessageType(
+                    'Accelerator',
+                    (_message.Message, ),
+                    {
+                        'ParametersEntry':
+                        _reflection.GeneratedProtocolMessageType(
+                            'ParametersEntry',
+                            (_message.Message, ),
+                            {
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
+                                '__module__': 'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
+                            }),
+                        'DESCRIPTOR':
+                        _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
+                        '__module__':
+                        'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
+                    }),
+                'DESCRIPTOR':
+                _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators)
+            }),
+        'PinnedMemoryBuffer':
+        _reflection.GeneratedProtocolMessageType(
+            'PinnedMemoryBuffer',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer)
+            }),
+        'DESCRIPTOR':
+        _MODELOPTIMIZATIONPOLICY,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy)
+    })
+_sym_db.RegisterMessage(ModelOptimizationPolicy)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
+_sym_db.RegisterMessage(
+    ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators)
+_sym_db.RegisterMessage(
+    ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
+_sym_db.RegisterMessage(
+    ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer)
+
+ModelQueuePolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelQueuePolicy',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELQUEUEPOLICY,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy)
+    })
+_sym_db.RegisterMessage(ModelQueuePolicy)
+
+ModelDynamicBatching = _reflection.GeneratedProtocolMessageType(
+    'ModelDynamicBatching',
+    (_message.Message, ),
+    {
+        'PriorityQueuePolicyEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'PriorityQueuePolicyEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry)
+            }),
+        'DESCRIPTOR':
+        _MODELDYNAMICBATCHING,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching)
+    })
+_sym_db.RegisterMessage(ModelDynamicBatching)
+_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry)
+
+ModelSequenceBatching = _reflection.GeneratedProtocolMessageType(
+    'ModelSequenceBatching',
+    (_message.Message, ),
+    {
+        'Control':
+        _reflection.GeneratedProtocolMessageType(
+            'Control',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control)
+            }),
+        'ControlInput':
+        _reflection.GeneratedProtocolMessageType(
+            'ControlInput',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput)
+            }),
+        'InitialState':
+        _reflection.GeneratedProtocolMessageType(
+            'InitialState',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState)
+            }),
+        'State':
+        _reflection.GeneratedProtocolMessageType(
+            'State',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State)
+            }),
+        'StrategyDirect':
+        _reflection.GeneratedProtocolMessageType(
+            'StrategyDirect',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect)
+            }),
+        'StrategyOldest':
+        _reflection.GeneratedProtocolMessageType(
+            'StrategyOldest',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest)
+            }),
+        'DESCRIPTOR':
+        _MODELSEQUENCEBATCHING,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching)
+    })
+_sym_db.RegisterMessage(ModelSequenceBatching)
+_sym_db.RegisterMessage(ModelSequenceBatching.Control)
+_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput)
+_sym_db.RegisterMessage(ModelSequenceBatching.InitialState)
+_sym_db.RegisterMessage(ModelSequenceBatching.State)
+_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect)
+_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest)
+
+ModelEnsembling = _reflection.GeneratedProtocolMessageType(
+    'ModelEnsembling',
+    (_message.Message, ),
+    {
+        'Step':
+        _reflection.GeneratedProtocolMessageType(
+            'Step',
+            (_message.Message, ),
+            {
+                'InputMapEntry':
+                _reflection.GeneratedProtocolMessageType(
+                    'InputMapEntry',
+                    (_message.Message, ),
+                    {
+                        'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY,
+                        '__module__': 'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry)
+                    }),
+                'OutputMapEntry':
+                _reflection.GeneratedProtocolMessageType(
+                    'OutputMapEntry',
+                    (_message.Message, ),
+                    {
+                        'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
+                        '__module__': 'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry)
+                    }),
+                'DESCRIPTOR':
+                _MODELENSEMBLING_STEP,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step)
+            }),
+        'DESCRIPTOR':
+        _MODELENSEMBLING,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling)
+    })
+_sym_db.RegisterMessage(ModelEnsembling)
+_sym_db.RegisterMessage(ModelEnsembling.Step)
+_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry)
+_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry)
+
+ModelParameter = _reflection.GeneratedProtocolMessageType(
+    'ModelParameter',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELPARAMETER,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelParameter)
+    })
+_sym_db.RegisterMessage(ModelParameter)
+
+ModelWarmup = _reflection.GeneratedProtocolMessageType(
+    'ModelWarmup',
+    (_message.Message, ),
+    {
+        'Input':
+        _reflection.GeneratedProtocolMessageType(
+            'Input',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELWARMUP_INPUT,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input)
+            }),
+        'InputsEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'InputsEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry)
+            }),
+        'DESCRIPTOR':
+        _MODELWARMUP,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelWarmup)
+    })
+_sym_db.RegisterMessage(ModelWarmup)
+_sym_db.RegisterMessage(ModelWarmup.Input)
+_sym_db.RegisterMessage(ModelWarmup.InputsEntry)
+
+ModelOperations = _reflection.GeneratedProtocolMessageType(
+    'ModelOperations',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELOPERATIONS,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelOperations)
+    })
+_sym_db.RegisterMessage(ModelOperations)
+
+ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelTransactionPolicy',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELTRANSACTIONPOLICY,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy)
+    })
+_sym_db.RegisterMessage(ModelTransactionPolicy)
+
+ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType(
+    'ModelRepositoryAgents',
+    (_message.Message, ),
+    {
+        'Agent':
+        _reflection.GeneratedProtocolMessageType(
+            'Agent',
+            (_message.Message, ),
+            {
+                'ParametersEntry':
+                _reflection.GeneratedProtocolMessageType(
+                    'ParametersEntry',
+                    (_message.Message, ),
+                    {
+                        'DESCRIPTOR':
+                        _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY,
+                        '__module__': 'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry)
+                    }),
+                'DESCRIPTOR':
+                _MODELREPOSITORYAGENTS_AGENT,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent)
+            }),
+        'DESCRIPTOR':
+        _MODELREPOSITORYAGENTS,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents)
+    })
+_sym_db.RegisterMessage(ModelRepositoryAgents)
+_sym_db.RegisterMessage(ModelRepositoryAgents.Agent)
+_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry)
+
+ModelResponseCache = _reflection.GeneratedProtocolMessageType(
+    'ModelResponseCache',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELRESPONSECACHE,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelResponseCache)
+    })
+_sym_db.RegisterMessage(ModelResponseCache)
+
+ModelConfig = _reflection.GeneratedProtocolMessageType(
+    'ModelConfig',
+    (_message.Message, ),
+    {
+        'CcModelFilenamesEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'CcModelFilenamesEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry)
+            }),
+        'MetricTagsEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'MetricTagsEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry)
+            }),
+        'ParametersEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'ParametersEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry)
+            }),
+        'DESCRIPTOR':
+        _MODELCONFIG,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelConfig)
+    })
+_sym_db.RegisterMessage(ModelConfig)
+_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry)
+_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry)
+_sym_db.RegisterMessage(ModelConfig.ParametersEntry)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001'
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001'
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001'
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001'
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001'
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001'
+    _MODELWARMUP_INPUTSENTRY._options = None
+    _MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001'
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001'
+    _MODELCONFIG_CCMODELFILENAMESENTRY._options = None
+    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001'
+    _MODELCONFIG_METRICTAGSENTRY._options = None
+    _MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001'
+    _MODELCONFIG_PARAMETERSENTRY._options = None
+    _MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001'
+    _DATATYPE._serialized_start = 8137
+    _DATATYPE._serialized_end = 8387
+    _MODELRATELIMITER._serialized_start = 35
+    _MODELRATELIMITER._serialized_end = 185
+    _MODELRATELIMITER_RESOURCE._serialized_start = 130
+    _MODELRATELIMITER_RESOURCE._serialized_end = 185
+    _MODELINSTANCEGROUP._serialized_start = 188
+    _MODELINSTANCEGROUP._serialized_end = 707
+    _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484
+    _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640
+    _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603
+    _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640
+    _MODELINSTANCEGROUP_KIND._serialized_start = 642
+    _MODELINSTANCEGROUP_KIND._serialized_end = 707
+    _MODELTENSORRESHAPE._serialized_start = 709
+    _MODELTENSORRESHAPE._serialized_end = 744
+    _MODELINPUT._serialized_start = 747
+    _MODELINPUT._serialized_end = 1053
+    _MODELINPUT_FORMAT._serialized_start = 994
+    _MODELINPUT_FORMAT._serialized_end = 1053
+    _MODELOUTPUT._serialized_start = 1056
+    _MODELOUTPUT._serialized_end = 1234
+    _BATCHINPUT._serialized_start = 1237
+    _BATCHINPUT._serialized_end = 1582
+    _BATCHINPUT_KIND._serialized_start = 1377
+    _BATCHINPUT_KIND._serialized_end = 1582
+    _BATCHOUTPUT._serialized_start = 1585
+    _BATCHOUTPUT._serialized_end = 1728
+    _BATCHOUTPUT_KIND._serialized_start = 1686
+    _BATCHOUTPUT_KIND._serialized_end = 1728
+    _MODELVERSIONPOLICY._serialized_start = 1731
+    _MODELVERSIONPOLICY._serialized_end = 2003
+    _MODELVERSIONPOLICY_LATEST._serialized_start = 1919
+    _MODELVERSIONPOLICY_LATEST._serialized_end = 1949
+    _MODELVERSIONPOLICY_ALL._serialized_start = 1951
+    _MODELVERSIONPOLICY_ALL._serialized_end = 1956
+    _MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958
+    _MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986
+    _MODELOPTIMIZATIONPOLICY._serialized_start = 2006
+    _MODELOPTIMIZATIONPOLICY._serialized_end = 3795
+    _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536
+    _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558
+    _MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561
+    _MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682
+    _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684
+    _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720
+    _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722
+    _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795
+    _MODELQUEUEPOLICY._serialized_start = 3798
+    _MODELQUEUEPOLICY._serialized_end = 4017
+    _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979
+    _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017
+    _MODELDYNAMICBATCHING._serialized_start = 4020
+    _MODELDYNAMICBATCHING._serialized_end = 4431
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431
+    _MODELSEQUENCEBATCHING._serialized_start = 4434
+    _MODELSEQUENCEBATCHING._serialized_end = 5697
+    _MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759
+    _MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064
+    _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947
+    _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064
+    _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066
+    _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153
+    _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156
+    _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294
+    _MODELSEQUENCEBATCHING_STATE._serialized_start = 5297
+    _MODELSEQUENCEBATCHING_STATE._serialized_end = 5469
+    _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471
+    _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559
+    _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561
+    _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678
+    _MODELENSEMBLING._serialized_start = 5700
+    _MODELENSEMBLING._serialized_end = 6049
+    _MODELENSEMBLING_STEP._serialized_start = 5767
+    _MODELENSEMBLING_STEP._serialized_end = 6049
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049
+    _MODELPARAMETER._serialized_start = 6051
+    _MODELPARAMETER._serialized_end = 6089
+    _MODELWARMUP._serialized_start = 6092
+    _MODELWARMUP._serialized_end = 6437
+    _MODELWARMUP_INPUT._serialized_start = 6209
+    _MODELWARMUP_INPUT._serialized_end = 6360
+    _MODELWARMUP_INPUTSENTRY._serialized_start = 6362
+    _MODELWARMUP_INPUTSENTRY._serialized_end = 6437
+    _MODELOPERATIONS._serialized_start = 6439
+    _MODELOPERATIONS._serialized_end = 6485
+    _MODELTRANSACTIONPOLICY._serialized_start = 6487
+    _MODELTRANSACTIONPOLICY._serialized_end = 6530
+    _MODELREPOSITORYAGENTS._serialized_start = 6533
+    _MODELREPOSITORYAGENTS._serialized_end = 6763
+    _MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615
+    _MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682
+    _MODELRESPONSECACHE._serialized_start = 6765
+    _MODELRESPONSECACHE._serialized_end = 6801
+    _MODELCONFIG._serialized_start = 6804
+    _MODELCONFIG._serialized_end = 8134
+    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929
+    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984
+    _MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986
+    _MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035
+    _MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037
+    _MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113
+# @@protoc_insertion_point(module_scope)
diff --git a/visualdl/server/app.py b/visualdl/server/app.py
index 06dff8163..2ce5d4a0a 100644
--- a/visualdl/server/app.py
+++ b/visualdl/server/app.py
@@ -32,6 +32,7 @@
 
 import visualdl.server
 from visualdl import __version__
+from visualdl.component.inference.fastdeploy_server import create_fastdeploy_api_call
 from visualdl.component.inference.model_convert_server import create_model_convert_api_call
 from visualdl.component.profiler.profiler_server import create_profiler_api_call
 from visualdl.server.api import create_api_call
@@ -70,6 +71,7 @@ def create_app(args):  # noqa: C901
     api_call = create_api_call(args.logdir, args.model, args.cache_timeout)
     profiler_api_call = create_profiler_api_call(args.logdir)
     inference_api_call = create_model_convert_api_call()
+    fastdeploy_api_call = create_fastdeploy_api_call()
     if args.telemetry:
         update_util.PbUpdater(args.product).start()
 
@@ -152,6 +154,15 @@ def serve_inference_api(method):
         return make_response(
             Response(data, mimetype=mimetype, headers=headers))
 
+    @app.route(api_path + '/fastdeploy/<path:method>', methods=["GET", "POST"])
+    def serve_fastdeploy_api(method):
+        if request.method == 'POST':
+            data, mimetype, headers = fastdeploy_api_call(method, request.form)
+        else:
+            data, mimetype, headers = fastdeploy_api_call(method, request.args)
+        return make_response(
+            Response(data, mimetype=mimetype, headers=headers))
+
     @app.route(check_live_path)
     def check_live():
         return '', 204

From a66fa1be4a44a6783789208e74e8792d70893799 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 22 Nov 2022 03:03:03 +0000
Subject: [PATCH 02/48] fix

---
 .../component/inference/fastdeploy_lib.py     | 60 ++++++++++++-------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 0f56b70d0..cd54ee0d0 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -1,4 +1,5 @@
-import multiprocessing
+import os
+import json
 from subprocess import CalledProcessError
 from subprocess import PIPE
 from subprocess import Popen
@@ -10,45 +11,62 @@
 
 
 def pbtxt2json(content: str):
-    '''
+  '''
   Convert protocol messages in text format to json format string.
   '''
-    message = text_format.Parse(content, ModelConfig())
-    json_string = json_format.MessageToJson(message)
-    return json_string
+  message = text_format.Parse(content, ModelConfig())
+  json_string = json_format.MessageToJson(message)
+  return json_string
 
 
 def json2pbtxt(content: str):
-    '''
+  '''
   Convert json format string to protocol messages in text format.
   '''
-    message = json_format.Parse(content, ModelConfig())
-    text_proto = text_format.MessageToString(message)
-    return text_proto
+  message = json_format.Parse(content, ModelConfig())
+  text_proto = text_format.MessageToString(message)
+  return text_proto
+
+def analyse_config(cur_dir:str):
+  '''
+  Analyse the model config in specified directory.
+  Return a json object to describe configuration.
+  '''
+  all_model_configs = {}
+  all_model_paths = {}
+  for parent_dir, sub_dirs, filenames in os.walk(cur_dir):
+    for filename in filenames:
+      if '.pbtxt' in filename:
+        model_name = os.path.basename(parent_dir)
+        all_model_paths[model_name] = parent_dir
+        json_config = json.loads(pbtxt2json(open(os.path.join(parent_dir, filename)).read()))
+        all_model_configs[model_name] = json_config
+        print(model_name)
+        print(json.dumps(json_config, indent=2))
 
 
 def launch_process(kwargs: dict):
-    '''
+  '''
   Launch a fastdeploy server according to specified arguments.
   '''
-    cmd = ['fastdeployserver']
-    for key, value in kwargs.items():
-        cmd.append('--{}'.format(key))
-        cmd.append('{}'.foramt(value))
-    p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True)
-    return p
+  cmd = ['fastdeployserver']
+  for key, value in kwargs.items():
+      cmd.append('--{}'.format(key))
+      cmd.append('{}'.foramt(value))
+  p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True)
+  return p
 
 
 def get_process_output(process):
-    '''
+  '''
   Get the standard output of a opened subprocess.
   '''
-    for line in process.stdout:
-        yield line
+  for line in process.stdout:
+      yield line
 
 
 def kill_process(process):
-    '''
+  '''
   Stop a opened subprocess.
   '''
-    process.kill()
+  process.kill()

From 9a929753c790a6253eb032ae7c13668e0b33c188 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 23 Nov 2022 08:24:51 +0000
Subject: [PATCH 03/48] add code

---
 .../component/inference/fastdeploy_lib.py     | 200 +++++++++++++++++-
 .../component/inference/fastdeploy_server.py  |   7 +-
 2 files changed, 197 insertions(+), 10 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index cd54ee0d0..0de435b1a 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -1,5 +1,7 @@
 import os
 import json
+import re
+import copy
 from subprocess import CalledProcessError
 from subprocess import PIPE
 from subprocess import Popen
@@ -34,15 +36,199 @@ def analyse_config(cur_dir:str):
   '''
   all_model_configs = {}
   all_model_paths = {}
-  for parent_dir, sub_dirs, filenames in os.walk(cur_dir):
+  all_model_versions = {}
+  parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(None)  # models can only put directory in model repository, 
+                                                                #so we should only search depth 1 directories.
+  for model_dir_name in sub_dirs:
+    model_dir, model_sub_dirs, filenames = os.walk(os.path.join(parent_dir, model_dir_name)).send(None)
+    model_name = os.path.basename(model_dir)
     for filename in filenames:
       if '.pbtxt' in filename:
-        model_name = os.path.basename(parent_dir)
-        all_model_paths[model_name] = parent_dir
-        json_config = json.loads(pbtxt2json(open(os.path.join(parent_dir, filename)).read()))
-        all_model_configs[model_name] = json_config
-        print(model_name)
-        print(json.dumps(json_config, indent=2))
+        all_model_paths[model_name] = model_dir  # store model path
+        json_config = json.loads(pbtxt2json(open(os.path.join(model_dir, filename)).read()))
+        all_model_configs[model_name] = json_config    # store original config file content in json format
+    for model_sub_dir in model_sub_dirs:
+      if re.match('\d+', model_sub_dir): # version directory consists of numbers
+        for version_resource_file in os.listdir(os.path.join(model_dir, model_sub_dir)):
+          if model_name not in all_model_versions:
+            all_model_versions[model_name] = {}
+          if model_sub_dir not in all_model_versions[model_name]:
+            all_model_versions[model_name][model_sub_dir] = []
+          all_model_versions[model_name][model_sub_dir].append(version_resource_file)
+
+def exchange_format_to_original_format(exchange_format):
+  '''
+  Change config exchange format to original format.
+  '''
+  ensembles = []
+  models = []
+  all_models = {}
+  if 'ensembles' in exchange_format:
+    emsembles = exchange_format['ensembles']
+  if 'models' in exchange_format:
+    models = exchange_format['models']
+  alls = ensembles + models
+  for model_config in alls:
+    # 1. add 'execution_accelerators' keyword
+    if 'optimization' in model_config:
+      optimization_config = model_config['optimization']
+      del model_config['optimization']
+      model_config['optimization'] = {}
+      model_config['optimization']['execution_accelerators'] = optimization_config
+    # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword
+    for accelerator_name, accelerator_items in optimization_config.items():
+      reversed_accelerator_items = []
+      for accelerator_item in accelerator_items:
+        transformed_accelerator_item = {}
+        for key, value in accelerator_item.items():
+          if key == 'name':
+            transformed_accelerator_item[key] = value
+          else:
+            if 'parameters' not in transformed_accelerator_item:
+              transformed_accelerator_item['parameters'] = {}
+            transformed_accelerator_item['parameters'][key] = value
+        reversed_accelerator_items.append(transformed_accelerator_item)
+      del optimization_config[accelerator_name]
+      optimization_config[accelerator_name] = reversed_accelerator_items
+        
+    # 3. delete versions information
+    if 'versions' in model_config:
+      del model_config['versions']
+    if 'platform' in model_config and model_config['platform'] == 'ensemble':  # emsemble model
+      # 4. add 'ensembleScheduling' keyword
+      if 'step' in model_config:
+        step_configs = model_config['step']
+        if 'ensembleScheduling'  not in model_config:
+          model_config['ensembleScheduling'] = {}
+        model_config['ensembleScheduling']['step'] = step_configs
+        del model_config['step']
+        # 5. remove two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
+        remove_list = []
+        for model_config_in_step in step_configs:
+            if model_config_in_step['modelName'] == 'feed' or model_config_in_step['modelName'] == 'fetch':
+              remove_list.append(model_config_in_step)
+              continue
+            del model_config_in_step['modelType']
+            del model_config_in_step['inputModels']
+            del model_config_in_step['outputModels']
+            del model_config_in_step['inputVars']
+            del model_config_in_step['outputVars']
+    all_models['name'] = model_config
+  return all_models
+    
+
+def original_format_to_exchange_format(original_format, version_info):
+  '''
+  Change config original format to exchange format.
+  '''
+  exchange_format = {}
+  exchange_format['ensembles'] = []
+  exchange_format['models'] = []
+  for model_name, model_config in original_format.items():
+    # 1. remove 'execution_accelerators' keyword
+    # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' outside
+    transformed_config = copy.deepcopy(model_config)
+    if 'optimization' in model_config:
+      if 'execution_accelerators' in model_config['optimization']:
+        transformed_optimization_config = {}
+        for accelerator_name, accelerator_items in model_config['optimization']['execution_accelerators'].items():
+          transformed_optimization_config[accelerator_name] = []
+          for accelerator_item in accelerator_items:
+            transformed_accelerator_item = {}
+            for key, value in accelerator_item.items():
+              if key == 'parameters':
+                for parameter_name, parameter_value in value.items():
+                  transformed_accelerator_item[parameter_name] = parameter_value
+              else:
+                  transformed_accelerator_item[key] = value
+            transformed_optimization_config[accelerator_name].append(transformed_accelerator_item)
+        del transformed_config['optimization']
+        transformed_config['optimization'] = transformed_optimization_config
+    # 3. add versions information
+    if model_name in version_info:
+      transformed_config[model_name]['versions'] = version_info[model_name]
+    if 'platform' in model_config and model_config['platform'] == 'ensemble':  # emsemble model 
+      # 4. remove ensembleScheduling
+      if 'ensembleScheduling' in model_config:
+        if 'step' in model_config['ensembleScheduling']:
+          del transformed_config['ensembleScheduling']
+          transformed_config['step'] = model_config['ensembleScheduling']['step']
+          # 5. add two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
+          for model_config_in_step in transformed_config['step']:
+            model_config_in_step['modelType'] = 'normal'
+            model_config_in_step['inputModels'] = []
+            model_config_in_step['outputModels'] = []
+            model_config_in_step['inputVars'] = []
+            model_config_in_step['outputVars'] = []
+
+          transformed_config['step'].append({
+            "modelName": "feed",
+            "modelType": "virtual",
+            "inputModels": [],
+            "outputModels": [],
+            "inputVars": [],
+            "outputVars": []
+             })
+          transformed_config['step'].append({
+            "modelName": "fetch",
+            "modelType": "virtual",
+            "inputModels": [],
+            "outputModels": [],
+            "inputVars": [],
+            "outputVars": []
+             })
+          analyse_step_relationships(transformed_config['step'], transformed_config['input'], transformed_config['output'])
+          exchange_format['ensembles'].append(transformed_config)
+    elif 'backend' in model_config:  # single model
+      exchange_format['models'].append(transformed_config)
+  return exchange_format
+
+def analyse_step_relationships(step_config, inputs, outputs):
+  '''
+  Analyse model relationships in ensemble step. And fill  "inputModels", "outputModels", "inputVars", "outputVars" in step_config.
+  step_config: step data in ensemble model config.
+  inputs: inputs in ensemble model config.
+  outputs: outputs in ensemble model config.
+  '''
+  models_dict = {}
+  vars_dict = {}
+  for model_config_in_step in step_config:
+    models_dict[model_config_in_step['modelName']] = model_config_in_step
+    if model_config_in_step['modelType'] == 'virtual':
+      for var in inputs:
+        if var['name'] not in vars_dict:
+          vars_dict[var['name']] = {}
+          vars_dict[var['name']]['from_models'] = []
+          vars_dict[var['name']]['to_models'] = []
+        vars_dict[var['name']]['from_models'].append('feed')
+      for var in outputs:
+        if var['name'] not in vars_dict:
+          vars_dict[var['name']] = {}
+          vars_dict[var['name']]['from_models'] = []
+          vars_dict[var['name']]['to_models'] = []
+        vars_dict[var['name']]['to_models'].append('fetch')
+    else:
+      for var_placehold_name, var_name in model_config_in_step['inputMap'].items():
+        if var_name not in vars_dict:
+          vars_dict[var_name] = {}
+          vars_dict[var_name]['from_models'] = []
+          vars_dict[var_name]['to_models'] = []
+        vars_dict[var_name]['to_models'].append(model_config_in_step['modelName'])
+        
+      for var_placehold_name, var_name in model_config_in_step['outputMap'].items():
+        if var_name not in vars_dict:
+          vars_dict[var_name] = {}
+          vars_dict[var_name]['from_models'] = []
+          vars_dict[var_name]['to_models'] = []
+        vars_dict[var_name]['from_models'].append(model_config_in_step['modelName'])
+  for var_name, relationships in vars_dict.items():
+    for from_model in relationships['from_models']:
+      models_dict[from_model]['outputVars'].append(var_name)
+      models_dict[from_model]['outputModels'].extend(relationships['to_models'])
+    for to_model in relationships['to_models']:
+      models_dict[to_model]['inputVars'].append(var_name)
+      models_dict[to_model]['inputModels'].extend(relationships['from_models'])
+
 
 
 def launch_process(kwargs: dict):
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index d54a3a7a4..48705de47 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -57,7 +57,7 @@ def get_config(self, cur_dir):
         pass
 
     @result()
-    def config_update(self, cur_dir, model_name):
+    def config_update(self, cur_dir, model_name, config):
         pass
 
     @result()
@@ -83,11 +83,12 @@ def create_fastdeploy_api_call():
     api = FastDeployServerApi()
     routes = {
         'get_directory': (api.get_directory, ['dir']),
-        'config_update': (api.config_update, ['dir', 'name']),
+        'config_update': (api.config_update, ['dir', 'name', 'config']),
         'get_config': (api.get_config, ['dir']),
         'start_server': (api.start_server, ['dir', 'args']),
         'stop_server': (api.stop_server, ['server_id']),
-        'get_server_output': (api.get_server_output, ['server_id'])
+        'get_server_output': (api.get_server_output, ['server_id']),
+        'test_server': (api.test_server_with_gradio, ['server_id'])
     }
 
     def call(path: str, args):

From 65e7a4c6cd2438120348a72752b414c1823b9627 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 30 Nov 2022 02:20:24 +0000
Subject: [PATCH 04/48] fix

---
 .../component/inference/fastdeploy_lib.py     | 50 +++++++++---------
 .../component/inference/fastdeploy_server.py  | 52 +++++++++++++++++--
 visualdl/server/app.py                        | 44 ++++++++++++++++
 3 files changed, 118 insertions(+), 28 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 0de435b1a..d734c31e0 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -55,6 +55,7 @@ def analyse_config(cur_dir:str):
           if model_sub_dir not in all_model_versions[model_name]:
             all_model_versions[model_name][model_sub_dir] = []
           all_model_versions[model_name][model_sub_dir].append(version_resource_file)
+  return all_model_configs, all_model_versions, all_model_paths
 
 def exchange_format_to_original_format(exchange_format):
   '''
@@ -64,7 +65,7 @@ def exchange_format_to_original_format(exchange_format):
   models = []
   all_models = {}
   if 'ensembles' in exchange_format:
-    emsembles = exchange_format['ensembles']
+    ensembles = exchange_format['ensembles']
   if 'models' in exchange_format:
     models = exchange_format['models']
   alls = ensembles + models
@@ -74,23 +75,22 @@ def exchange_format_to_original_format(exchange_format):
       optimization_config = model_config['optimization']
       del model_config['optimization']
       model_config['optimization'] = {}
-      model_config['optimization']['execution_accelerators'] = optimization_config
-    # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword
-    for accelerator_name, accelerator_items in optimization_config.items():
-      reversed_accelerator_items = []
-      for accelerator_item in accelerator_items:
-        transformed_accelerator_item = {}
-        for key, value in accelerator_item.items():
-          if key == 'name':
-            transformed_accelerator_item[key] = value
-          else:
-            if 'parameters' not in transformed_accelerator_item:
-              transformed_accelerator_item['parameters'] = {}
-            transformed_accelerator_item['parameters'][key] = value
-        reversed_accelerator_items.append(transformed_accelerator_item)
-      del optimization_config[accelerator_name]
-      optimization_config[accelerator_name] = reversed_accelerator_items
-        
+      model_config['optimization']['executionAccelerators'] = optimization_config
+      # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword
+      for accelerator_name, accelerator_items in optimization_config.items():
+        reversed_accelerator_items = []
+        for accelerator_item in accelerator_items:
+          transformed_accelerator_item = {}
+          for key, value in accelerator_item.items():
+            if key == 'name':
+              transformed_accelerator_item[key] = value
+            else:
+              if 'parameters' not in transformed_accelerator_item:
+                transformed_accelerator_item['parameters'] = {}
+              transformed_accelerator_item['parameters'][key] = value
+          reversed_accelerator_items.append(transformed_accelerator_item)
+        del optimization_config[accelerator_name]
+        optimization_config[accelerator_name] = reversed_accelerator_items
     # 3. delete versions information
     if 'versions' in model_config:
       del model_config['versions']
@@ -113,7 +113,9 @@ def exchange_format_to_original_format(exchange_format):
             del model_config_in_step['outputModels']
             del model_config_in_step['inputVars']
             del model_config_in_step['outputVars']
-    all_models['name'] = model_config
+        for remove_item in remove_list:
+          step_configs.remove(remove_item)
+    all_models[model_config['name']] = model_config
   return all_models
     
 
@@ -125,13 +127,13 @@ def original_format_to_exchange_format(original_format, version_info):
   exchange_format['ensembles'] = []
   exchange_format['models'] = []
   for model_name, model_config in original_format.items():
-    # 1. remove 'execution_accelerators' keyword
-    # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' outside
+    # 1. remove 'executionAccelerators' keyword
+    # 2. put parameters in 'cpuExecutionAccelerator' and 'gpuExecutionAccelerator' outside
     transformed_config = copy.deepcopy(model_config)
     if 'optimization' in model_config:
-      if 'execution_accelerators' in model_config['optimization']:
+      if 'executionAccelerators' in model_config['optimization']:
         transformed_optimization_config = {}
-        for accelerator_name, accelerator_items in model_config['optimization']['execution_accelerators'].items():
+        for accelerator_name, accelerator_items in model_config['optimization']['executionAccelerators'].items():
           transformed_optimization_config[accelerator_name] = []
           for accelerator_item in accelerator_items:
             transformed_accelerator_item = {}
@@ -146,7 +148,7 @@ def original_format_to_exchange_format(original_format, version_info):
         transformed_config['optimization'] = transformed_optimization_config
     # 3. add versions information
     if model_name in version_info:
-      transformed_config[model_name]['versions'] = version_info[model_name]
+      transformed_config['versions'] = version_info[model_name]
     if 'platform' in model_config and model_config['platform'] == 'ensemble':  # emsemble model 
       # 4. remove ensembleScheduling
       if 'ensembleScheduling' in model_config:
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 48705de47..ec9f99e1f 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -19,16 +19,23 @@
 from collections import deque
 from pathlib import Path
 from threading import Lock
+import threading
+from multiprocessing import Process
+import socket
+import time
 
+import requests
 from flask import request
 
 from .fastdeploy_lib import get_process_output
-from .fastdeploy_lib import json2pbtxt
+from .fastdeploy_lib import json2pbtxt,analyse_config
+from .fastdeploy_lib import exchange_format_to_original_format, original_format_to_exchange_format
 from .fastdeploy_lib import kill_process
 from .fastdeploy_lib import launch_process
 from .fastdeploy_lib import pbtxt2json
 from visualdl.server.api import gen_result
 from visualdl.server.api import result
+from .fastdeploy_client.client_app import create_gradio_client_app
 
 
 class FastDeployServerApi(object):
@@ -36,6 +43,8 @@ def __init__(self):
         self.root_dir = Path(os.getcwd())
         self.opened_servers = {
         }  # Use to store the opened server process pid and process itself
+        self.client_port = None
+        self.model_paths = {}
 
     @result()
     def get_directory(self, cur_dir):
@@ -54,11 +63,20 @@ def get_directory(self, cur_dir):
 
     @result()
     def get_config(self, cur_dir):
-        pass
+        all_model_configs, all_model_versions, all_model_paths  = analyse_config(cur_dir)
+        for name, value in all_model_paths.items():
+            self.model_paths[(Path(os.path.abspath(cur_dir)),name)] = value
+        return original_format_to_exchange_format(all_model_configs, all_model_versions)
 
     @result()
     def config_update(self, cur_dir, model_name, config):
-        pass
+        config = json.loads(config)
+        all_models = exchange_format_to_original_format(config)
+        model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), model_name)]
+        text_proto = json2pbtxt(json.dumps(all_models[model_name]))
+        with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
+            f.write(text_proto)
+        return
 
     @result()
     def start_server(self, configs):
@@ -77,6 +95,32 @@ def stop_server(self, server_id):
     def get_server_output(self, server_id):
         stdout_generator = get_process_output(server_id)
         return stdout_generator
+    
+    def create_fastdeploy_client(self):
+        if self.client_port is None:
+            def get_free_tcp_port():
+                tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+                tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
+                tcp.bind(('localhost', 0))
+                addr, port = tcp.getsockname()
+                tcp.close()
+                return port
+
+            self.client_port = get_free_tcp_port()
+            app = create_gradio_client_app()
+            thread = Process(target=app.launch, kwargs={'server_port': self.client_port})
+            thread.start()
+
+            def check_alive():
+                while True:
+                    try:
+                        requests.get('http://localhost:{}/'.format(self.client_port))
+                        break
+                    except Exception:
+                        time.sleep(1)
+
+            check_alive()
+        return self.client_port
 
 
 def create_fastdeploy_api_call():
@@ -88,7 +132,7 @@ def create_fastdeploy_api_call():
         'start_server': (api.start_server, ['dir', 'args']),
         'stop_server': (api.stop_server, ['server_id']),
         'get_server_output': (api.get_server_output, ['server_id']),
-        'test_server': (api.test_server_with_gradio, ['server_id'])
+        'create_fastdeploy_client': (api.create_fastdeploy_client, [])
     }
 
     def call(path: str, args):
diff --git a/visualdl/server/app.py b/visualdl/server/app.py
index 2ce5d4a0a..2be0c91bd 100644
--- a/visualdl/server/app.py
+++ b/visualdl/server/app.py
@@ -162,6 +162,50 @@ def serve_fastdeploy_api(method):
             data, mimetype, headers = fastdeploy_api_call(method, request.args)
         return make_response(
             Response(data, mimetype=mimetype, headers=headers))
+    
+    @app.route(api_path + '/fastdeploy/fastdeploy_client', methods=["GET", "POST"])
+    def serve_fastdeploy_create_fastdeploy_client():
+        try:
+            if request.method == 'POST':
+                fastdeploy_api_call('create_fastdeploy_client', request.form)
+            else:
+                fastdeploy_api_call('create_fastdeploy_client', request.args)
+        except Exception as e:
+            error_msg = '{}'.format(e)
+            return make_response(error_msg)
+        return redirect(api_path + "/fastdeploy/fastdeploy_client/app", code=302)
+
+    @app.route(api_path + "/fastdeploy/fastdeploy_client/<path:path>", methods=["GET", "POST"])
+    def request_fastdeploy_create_fastdeploy_client_app(path: str):
+        '''
+        Gradio app server url interface. We route urls for gradio app to gradio server.
+
+        Args:
+            path(str): All resource path from gradio server.
+
+        Returns:
+            Any thing from gradio server.
+        '''
+        if request.method == 'POST':
+            port = fastdeploy_api_call('create_fastdeploy_client', request.form)
+        else:
+            port = fastdeploy_api_call('create_fastdeploy_client', request.args)
+        if path == 'app':
+            proxy_url = request.url.replace(request.host_url.rstrip('/') + api_path + '/fastdeploy/fastdeploy_client/app',
+                                            'http://localhost:{}/'.format(port))
+        else:
+            proxy_url = request.url.replace(request.host_url.rstrip('/') + api_path + '/fastdeploy/fastdeploy_client/',
+                                            'http://localhost:{}/'.format(port))
+        resp = requests.request(method=request.method,
+                                url=proxy_url,
+                                headers={key: value
+                                         for (key, value) in request.headers if key != 'Host'},
+                                data=request.get_data(),
+                                cookies=request.cookies,
+                                allow_redirects=False)
+        headers = [(name, value) for (name, value) in resp.raw.headers.items()]
+        response = Response(resp.content, resp.status_code, headers)
+        return response
 
     @app.route(check_live_path)
     def check_live():

From 5f00917f7b5e45bc34614017db70c7f78297e013 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 1 Dec 2022 10:47:42 +0000
Subject: [PATCH 05/48] fix

---
 visualdl/component/inference/fastdeploy_lib.py    | 14 ++++++++++----
 visualdl/component/inference/fastdeploy_server.py | 14 +++++++++-----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index d734c31e0..a7a63a981 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -5,6 +5,7 @@
 from subprocess import CalledProcessError
 from subprocess import PIPE
 from subprocess import Popen
+import select
 
 import google.protobuf.json_format as json_format
 import google.protobuf.text_format as text_format
@@ -240,8 +241,8 @@ def launch_process(kwargs: dict):
   cmd = ['fastdeployserver']
   for key, value in kwargs.items():
       cmd.append('--{}'.format(key))
-      cmd.append('{}'.foramt(value))
-  p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True)
+      cmd.append('{}'.format(value))
+  p = Popen(cmd, stdout=PIPE, stderr=PIPE, bufsize=1, universal_newlines=True)
   return p
 
 
@@ -249,8 +250,13 @@ def get_process_output(process):
   '''
   Get the standard output of a opened subprocess.
   '''
-  for line in process.stdout:
-      yield line
+  while process.poll() is None:
+    readlist,_, _ = select.select([process.stdout, process.stderr],[],[])
+    for item in readlist:
+      data = item.readline()
+      if not data:
+        return
+      yield data
 
 
 def kill_process(process):
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index ec9f99e1f..b4493e4ea 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -80,27 +80,32 @@ def config_update(self, cur_dir, model_name, config):
 
     @result()
     def start_server(self, configs):
+        configs = json.loads(configs)
         process = launch_process(configs)
         self.opened_servers[process.pid] = process
         return process.pid
 
     @result()
     def stop_server(self, server_id):
+        server_id = int(server_id)
         if server_id not in self.opened_servers:
             return
         kill_process(self.opened_servers[server_id])
         del self.opened_servers[server_id]
 
-    @result('text/plain')
+    @result('application/octet-stream')
     def get_server_output(self, server_id):
-        stdout_generator = get_process_output(server_id)
+        server_id = int(server_id)
+        if server_id not in self.opened_servers:
+            return
+        stdout_generator = get_process_output(self.opened_servers[server_id])
         return stdout_generator
     
     def create_fastdeploy_client(self):
         if self.client_port is None:
             def get_free_tcp_port():
                 tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-                tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
+                # tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
                 tcp.bind(('localhost', 0))
                 addr, port = tcp.getsockname()
                 tcp.close()
@@ -110,7 +115,6 @@ def get_free_tcp_port():
             app = create_gradio_client_app()
             thread = Process(target=app.launch, kwargs={'server_port': self.client_port})
             thread.start()
-
             def check_alive():
                 while True:
                     try:
@@ -129,7 +133,7 @@ def create_fastdeploy_api_call():
         'get_directory': (api.get_directory, ['dir']),
         'config_update': (api.config_update, ['dir', 'name', 'config']),
         'get_config': (api.get_config, ['dir']),
-        'start_server': (api.start_server, ['dir', 'args']),
+        'start_server': (api.start_server, ['config']),
         'stop_server': (api.stop_server, ['server_id']),
         'get_server_output': (api.get_server_output, ['server_id']),
         'create_fastdeploy_client': (api.create_fastdeploy_client, [])

From 8af5ab94f54c5f02081c89a0f7adc2f08fc234e7 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 2 Dec 2022 14:11:17 +0800
Subject: [PATCH 06/48] add fastdeploy server component

---
 .../component/inference/fastdeploy_lib.py     | 502 ++++++++++--------
 .../component/inference/fastdeploy_server.py  |  75 +--
 visualdl/utils/dir.py                         |  11 +-
 3 files changed, 338 insertions(+), 250 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index a7a63a981..096976b05 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -1,266 +1,340 @@
-import os
+import copy
 import json
+import os
+import random
 import re
-import copy
-from subprocess import CalledProcessError
-from subprocess import PIPE
+import signal
+import string
 from subprocess import Popen
-import select
+from subprocess import STDOUT
 
 import google.protobuf.json_format as json_format
 import google.protobuf.text_format as text_format
 
 from .proto.model_config.protxt_pb2 import ModelConfig
+from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
 
 
 def pbtxt2json(content: str):
-  '''
+    '''
   Convert protocol messages in text format to json format string.
   '''
-  message = text_format.Parse(content, ModelConfig())
-  json_string = json_format.MessageToJson(message)
-  return json_string
+    message = text_format.Parse(content, ModelConfig())
+    json_string = json_format.MessageToJson(message)
+    return json_string
 
 
 def json2pbtxt(content: str):
-  '''
+    '''
   Convert json format string to protocol messages in text format.
   '''
-  message = json_format.Parse(content, ModelConfig())
-  text_proto = text_format.MessageToString(message)
-  return text_proto
+    message = json_format.Parse(content, ModelConfig())
+    text_proto = text_format.MessageToString(message)
+    return text_proto
 
-def analyse_config(cur_dir:str):
-  '''
+
+def analyse_config(cur_dir: str):
+    '''
   Analyse the model config in specified directory.
   Return a json object to describe configuration.
   '''
-  all_model_configs = {}
-  all_model_paths = {}
-  all_model_versions = {}
-  parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(None)  # models can only put directory in model repository, 
-                                                                #so we should only search depth 1 directories.
-  for model_dir_name in sub_dirs:
-    model_dir, model_sub_dirs, filenames = os.walk(os.path.join(parent_dir, model_dir_name)).send(None)
-    model_name = os.path.basename(model_dir)
-    for filename in filenames:
-      if '.pbtxt' in filename:
-        all_model_paths[model_name] = model_dir  # store model path
-        json_config = json.loads(pbtxt2json(open(os.path.join(model_dir, filename)).read()))
-        all_model_configs[model_name] = json_config    # store original config file content in json format
-    for model_sub_dir in model_sub_dirs:
-      if re.match('\d+', model_sub_dir): # version directory consists of numbers
-        for version_resource_file in os.listdir(os.path.join(model_dir, model_sub_dir)):
-          if model_name not in all_model_versions:
-            all_model_versions[model_name] = {}
-          if model_sub_dir not in all_model_versions[model_name]:
-            all_model_versions[model_name][model_sub_dir] = []
-          all_model_versions[model_name][model_sub_dir].append(version_resource_file)
-  return all_model_configs, all_model_versions, all_model_paths
+    all_model_configs = {}
+    all_model_paths = {}
+    all_model_versions = {}
+    parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(
+        None)  # models can only put directory in model repository,
+    # so we should only search depth 1 directories.
+    for model_dir_name in sub_dirs:
+        model_dir, model_sub_dirs, filenames = os.walk(
+            os.path.join(parent_dir, model_dir_name)).send(None)
+        model_name = os.path.basename(model_dir)
+        for filename in filenames:
+            if 'config.pbtxt' in filename:
+                all_model_paths[model_name] = model_dir  # store model path
+                json_config = json.loads(
+                    pbtxt2json(open(os.path.join(model_dir, filename)).read()))
+                all_model_configs[
+                    model_name] = json_config  # store original config file content in json format
+        for model_sub_dir in model_sub_dirs:
+            if re.match(
+                    r'\d+',
+                    model_sub_dir):  # version directory consists of numbers
+                for version_resource_file in os.listdir(
+                        os.path.join(model_dir, model_sub_dir)):
+                    if model_name not in all_model_versions:
+                        all_model_versions[model_name] = {}
+                    if model_sub_dir not in all_model_versions[model_name]:
+                        all_model_versions[model_name][model_sub_dir] = []
+                    all_model_versions[model_name][model_sub_dir].append(
+                        version_resource_file)
+    return all_model_configs, all_model_versions, all_model_paths
+
 
 def exchange_format_to_original_format(exchange_format):
-  '''
+    '''
   Change config exchange format to original format.
   '''
-  ensembles = []
-  models = []
-  all_models = {}
-  if 'ensembles' in exchange_format:
-    ensembles = exchange_format['ensembles']
-  if 'models' in exchange_format:
-    models = exchange_format['models']
-  alls = ensembles + models
-  for model_config in alls:
-    # 1. add 'execution_accelerators' keyword
-    if 'optimization' in model_config:
-      optimization_config = model_config['optimization']
-      del model_config['optimization']
-      model_config['optimization'] = {}
-      model_config['optimization']['executionAccelerators'] = optimization_config
-      # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword
-      for accelerator_name, accelerator_items in optimization_config.items():
-        reversed_accelerator_items = []
-        for accelerator_item in accelerator_items:
-          transformed_accelerator_item = {}
-          for key, value in accelerator_item.items():
-            if key == 'name':
-              transformed_accelerator_item[key] = value
-            else:
-              if 'parameters' not in transformed_accelerator_item:
-                transformed_accelerator_item['parameters'] = {}
-              transformed_accelerator_item['parameters'][key] = value
-          reversed_accelerator_items.append(transformed_accelerator_item)
-        del optimization_config[accelerator_name]
-        optimization_config[accelerator_name] = reversed_accelerator_items
-    # 3. delete versions information
-    if 'versions' in model_config:
-      del model_config['versions']
-    if 'platform' in model_config and model_config['platform'] == 'ensemble':  # emsemble model
-      # 4. add 'ensembleScheduling' keyword
-      if 'step' in model_config:
-        step_configs = model_config['step']
-        if 'ensembleScheduling'  not in model_config:
-          model_config['ensembleScheduling'] = {}
-        model_config['ensembleScheduling']['step'] = step_configs
-        del model_config['step']
-        # 5. remove two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
-        remove_list = []
-        for model_config_in_step in step_configs:
-            if model_config_in_step['modelName'] == 'feed' or model_config_in_step['modelName'] == 'fetch':
-              remove_list.append(model_config_in_step)
-              continue
-            del model_config_in_step['modelType']
-            del model_config_in_step['inputModels']
-            del model_config_in_step['outputModels']
-            del model_config_in_step['inputVars']
-            del model_config_in_step['outputVars']
-        for remove_item in remove_list:
-          step_configs.remove(remove_item)
-    all_models[model_config['name']] = model_config
-  return all_models
-    
+    ensembles = []
+    models = []
+    all_models = {}
+    if 'ensembles' in exchange_format:
+        ensembles = exchange_format['ensembles']
+    if 'models' in exchange_format:
+        models = exchange_format['models']
+    alls = ensembles + models
+    for model_config in alls:
+        # 1. add 'executionAccelerators' keyword
+        if 'optimization' in model_config:
+            optimization_config = model_config['optimization']
+            del model_config['optimization']
+            model_config['optimization'] = {}
+            model_config['optimization'][
+                'executionAccelerators'] = optimization_config
+        # 2. delete versions information
+        if 'versions' in model_config:
+            del model_config['versions']
+        if 'platform' in model_config and model_config[
+                'platform'] == 'ensemble':  # emsemble model
+            # 3. add 'ensembleScheduling' keyword
+            if 'step' in model_config:
+                step_configs = model_config['step']
+                if 'ensembleScheduling' not in model_config:
+                    model_config['ensembleScheduling'] = {}
+                model_config['ensembleScheduling']['step'] = step_configs
+                del model_config['step']
+                # 4. remove two virtual models(feed, fetch), and
+                #  "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
+                remove_list = []
+                for model_config_in_step in step_configs:
+                    if model_config_in_step[
+                            'modelName'] == 'feed' or model_config_in_step[
+                                'modelName'] == 'fetch':
+                        remove_list.append(model_config_in_step)
+                        continue
+                    del model_config_in_step['modelType']
+                    del model_config_in_step['inputModels']
+                    del model_config_in_step['outputModels']
+                    del model_config_in_step['inputVars']
+                    del model_config_in_step['outputVars']
+                for remove_item in remove_list:
+                    step_configs.remove(remove_item)
+        all_models[model_config['name']] = model_config
+    return all_models
+
 
 def original_format_to_exchange_format(original_format, version_info):
-  '''
+    '''
   Change config original format to exchange format.
   '''
-  exchange_format = {}
-  exchange_format['ensembles'] = []
-  exchange_format['models'] = []
-  for model_name, model_config in original_format.items():
-    # 1. remove 'executionAccelerators' keyword
-    # 2. put parameters in 'cpuExecutionAccelerator' and 'gpuExecutionAccelerator' outside
-    transformed_config = copy.deepcopy(model_config)
-    if 'optimization' in model_config:
-      if 'executionAccelerators' in model_config['optimization']:
-        transformed_optimization_config = {}
-        for accelerator_name, accelerator_items in model_config['optimization']['executionAccelerators'].items():
-          transformed_optimization_config[accelerator_name] = []
-          for accelerator_item in accelerator_items:
-            transformed_accelerator_item = {}
-            for key, value in accelerator_item.items():
-              if key == 'parameters':
-                for parameter_name, parameter_value in value.items():
-                  transformed_accelerator_item[parameter_name] = parameter_value
-              else:
-                  transformed_accelerator_item[key] = value
-            transformed_optimization_config[accelerator_name].append(transformed_accelerator_item)
-        del transformed_config['optimization']
-        transformed_config['optimization'] = transformed_optimization_config
-    # 3. add versions information
-    if model_name in version_info:
-      transformed_config['versions'] = version_info[model_name]
-    if 'platform' in model_config and model_config['platform'] == 'ensemble':  # emsemble model 
-      # 4. remove ensembleScheduling
-      if 'ensembleScheduling' in model_config:
-        if 'step' in model_config['ensembleScheduling']:
-          del transformed_config['ensembleScheduling']
-          transformed_config['step'] = model_config['ensembleScheduling']['step']
-          # 5. add two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
-          for model_config_in_step in transformed_config['step']:
-            model_config_in_step['modelType'] = 'normal'
-            model_config_in_step['inputModels'] = []
-            model_config_in_step['outputModels'] = []
-            model_config_in_step['inputVars'] = []
-            model_config_in_step['outputVars'] = []
+    exchange_format = {}
+    exchange_format['ensembles'] = []
+    exchange_format['models'] = []
+    # 0. transform version info into component format in frontend
+    for model_name, version_filenames_dict in version_info.items():
+        version_info_for_frontend = []
+        for version_name, filenames in version_filenames_dict.items():
+            version_filenames_dict_for_frontend = {}
+            version_filenames_dict_for_frontend['title'] = version_name
+            version_filenames_dict_for_frontend['key'] = version_name
+            version_filenames_dict_for_frontend['children'] = []
+            for filename in filenames:
+                version_filenames_dict_for_frontend['children'].append({
+                    'title':
+                    filename,
+                    'key':
+                    filename
+                })
+            version_info_for_frontend.append(
+                version_filenames_dict_for_frontend)
+        version_info[model_name] = version_info_for_frontend
+
+    for model_name, model_config in original_format.items():
+        # 1. remove 'executionAccelerators' keyword
+        transformed_config = copy.deepcopy(model_config)
+        if 'optimization' in model_config:
+            if 'executionAccelerators' in model_config['optimization']:
+                transformed_optimization_config = model_config['optimization'][
+                    'executionAccelerators']
+                del transformed_config['optimization']
+                transformed_config[
+                    'optimization'] = transformed_optimization_config
+        # 2. add versions information
+        if model_name in version_info:
+            transformed_config['versions'] = version_info[model_name]
+        if 'platform' in model_config and model_config[
+                'platform'] == 'ensemble':  # emsemble model
+            # 3. remove ensembleScheduling
+            if 'ensembleScheduling' in model_config:
+                if 'step' in model_config['ensembleScheduling']:
+                    del transformed_config['ensembleScheduling']
+                    transformed_config['step'] = model_config[
+                        'ensembleScheduling']['step']
+                    # 4. add two virtual models(feed, fetch), and
+                    # "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
+                    for model_config_in_step in transformed_config['step']:
+                        model_config_in_step['modelType'] = 'normal'
+                        model_config_in_step['inputModels'] = []
+                        model_config_in_step['outputModels'] = []
+                        model_config_in_step['inputVars'] = []
+                        model_config_in_step['outputVars'] = []
+
+                    transformed_config['step'].append({
+                        "modelName": "feed",
+                        "modelType": "virtual",
+                        "inputModels": [],
+                        "outputModels": [],
+                        "inputVars": [],
+                        "outputVars": []
+                    })
+                    transformed_config['step'].append({
+                        "modelName": "fetch",
+                        "modelType": "virtual",
+                        "inputModels": [],
+                        "outputModels": [],
+                        "inputVars": [],
+                        "outputVars": []
+                    })
+                    analyse_step_relationships(transformed_config['step'],
+                                               transformed_config['input'],
+                                               transformed_config['output'])
+                    exchange_format['ensembles'].append(transformed_config)
+        elif 'backend' in model_config:  # single model
+            exchange_format['models'].append(transformed_config)
+    return exchange_format
 
-          transformed_config['step'].append({
-            "modelName": "feed",
-            "modelType": "virtual",
-            "inputModels": [],
-            "outputModels": [],
-            "inputVars": [],
-            "outputVars": []
-             })
-          transformed_config['step'].append({
-            "modelName": "fetch",
-            "modelType": "virtual",
-            "inputModels": [],
-            "outputModels": [],
-            "inputVars": [],
-            "outputVars": []
-             })
-          analyse_step_relationships(transformed_config['step'], transformed_config['input'], transformed_config['output'])
-          exchange_format['ensembles'].append(transformed_config)
-    elif 'backend' in model_config:  # single model
-      exchange_format['models'].append(transformed_config)
-  return exchange_format
 
 def analyse_step_relationships(step_config, inputs, outputs):
-  '''
-  Analyse model relationships in ensemble step. And fill  "inputModels", "outputModels", "inputVars", "outputVars" in step_config.
+    '''
+  Analyse model relationships in ensemble step. And fill  \
+    "inputModels", "outputModels", "inputVars", "outputVars" in step_config.
   step_config: step data in ensemble model config.
   inputs: inputs in ensemble model config.
   outputs: outputs in ensemble model config.
   '''
-  models_dict = {}
-  vars_dict = {}
-  for model_config_in_step in step_config:
-    models_dict[model_config_in_step['modelName']] = model_config_in_step
-    if model_config_in_step['modelType'] == 'virtual':
-      for var in inputs:
-        if var['name'] not in vars_dict:
-          vars_dict[var['name']] = {}
-          vars_dict[var['name']]['from_models'] = []
-          vars_dict[var['name']]['to_models'] = []
-        vars_dict[var['name']]['from_models'].append('feed')
-      for var in outputs:
-        if var['name'] not in vars_dict:
-          vars_dict[var['name']] = {}
-          vars_dict[var['name']]['from_models'] = []
-          vars_dict[var['name']]['to_models'] = []
-        vars_dict[var['name']]['to_models'].append('fetch')
-    else:
-      for var_placehold_name, var_name in model_config_in_step['inputMap'].items():
-        if var_name not in vars_dict:
-          vars_dict[var_name] = {}
-          vars_dict[var_name]['from_models'] = []
-          vars_dict[var_name]['to_models'] = []
-        vars_dict[var_name]['to_models'].append(model_config_in_step['modelName'])
-        
-      for var_placehold_name, var_name in model_config_in_step['outputMap'].items():
-        if var_name not in vars_dict:
-          vars_dict[var_name] = {}
-          vars_dict[var_name]['from_models'] = []
-          vars_dict[var_name]['to_models'] = []
-        vars_dict[var_name]['from_models'].append(model_config_in_step['modelName'])
-  for var_name, relationships in vars_dict.items():
-    for from_model in relationships['from_models']:
-      models_dict[from_model]['outputVars'].append(var_name)
-      models_dict[from_model]['outputModels'].extend(relationships['to_models'])
-    for to_model in relationships['to_models']:
-      models_dict[to_model]['inputVars'].append(var_name)
-      models_dict[to_model]['inputModels'].extend(relationships['from_models'])
+    models_dict = {}
+    vars_dict = {}
+    for model_config_in_step in step_config:
+        models_dict[model_config_in_step['modelName']] = model_config_in_step
+        if model_config_in_step['modelType'] == 'virtual':
+            for var in inputs:
+                if var['name'] not in vars_dict:
+                    vars_dict[var['name']] = {}
+                    vars_dict[var['name']]['from_models'] = []
+                    vars_dict[var['name']]['to_models'] = []
+                vars_dict[var['name']]['from_models'].append('feed')
+            for var in outputs:
+                if var['name'] not in vars_dict:
+                    vars_dict[var['name']] = {}
+                    vars_dict[var['name']]['from_models'] = []
+                    vars_dict[var['name']]['to_models'] = []
+                vars_dict[var['name']]['to_models'].append('fetch')
+        else:
+            for var_placehold_name, var_name in model_config_in_step[
+                    'inputMap'].items():
+                if var_name not in vars_dict:
+                    vars_dict[var_name] = {}
+                    vars_dict[var_name]['from_models'] = []
+                    vars_dict[var_name]['to_models'] = []
+                vars_dict[var_name]['to_models'].append(
+                    model_config_in_step['modelName'])
 
+            for var_placehold_name, var_name in model_config_in_step[
+                    'outputMap'].items():
+                if var_name not in vars_dict:
+                    vars_dict[var_name] = {}
+                    vars_dict[var_name]['from_models'] = []
+                    vars_dict[var_name]['to_models'] = []
+                vars_dict[var_name]['from_models'].append(
+                    model_config_in_step['modelName'])
+    for var_name, relationships in vars_dict.items():
+        for from_model in relationships['from_models']:
+            models_dict[from_model]['outputVars'].append(var_name)
+            models_dict[from_model]['outputModels'].extend(
+                relationships['to_models'])
+        for to_model in relationships['to_models']:
+            models_dict[to_model]['inputVars'].append(var_name)
+            models_dict[to_model]['inputModels'].extend(
+                relationships['from_models'])
 
 
 def launch_process(kwargs: dict):
-  '''
+    '''
   Launch a fastdeploy server according to specified arguments.
   '''
-  cmd = ['fastdeployserver']
-  for key, value in kwargs.items():
-      cmd.append('--{}'.format(key))
-      cmd.append('{}'.format(value))
-  p = Popen(cmd, stdout=PIPE, stderr=PIPE, bufsize=1, universal_newlines=True)
-  return p
+    cmd = ['fastdeployserver']
+    for key, value in kwargs.items():
+        cmd.append('--{}'.format(key))
+        cmd.append('{}'.format(value))
 
+    logfilename = 'logfile-{}'.format(get_random_string(8))
+    while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)):
+        logfilename = 'logfile-{}'.format(get_random_string(8))
+    p = Popen(
+        cmd,
+        stdout=open(
+            os.path.join(FASTDEPLOYSERVER_PATH, logfilename), 'w',
+            buffering=1),
+        stderr=STDOUT,
+        universal_newlines=True)
+    with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(p.pid)),
+              'w') as f:
+        f.write(
+            logfilename
+        )  # filename ${p.pid} contain the real log filename ${logfilename}
+    return p
 
-def get_process_output(process):
-  '''
+
+def get_random_string(length):
+    # choose from all lowercase letter
+    letters = string.ascii_lowercase
+    result_str = ''.join([random.choice(letters) for i in range(length)])
+    return result_str
+
+
+def get_process_output(pid, length):
+    '''
   Get the standard output of a opened subprocess.
   '''
-  while process.poll() is None:
-    readlist,_, _ = select.select([process.stdout, process.stderr],[],[])
-    for item in readlist:
-      data = item.readline()
-      if not data:
-        return
-      yield data
+    if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
+        with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
+                  'r') as f:
+            logfilename = f.read()
+        # delete file ${logfilename} if exists
+        if os.path.exists(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
+            with open(
+                    os.path.join(FASTDEPLOYSERVER_PATH,
+                                 '{}'.format(logfilename)), 'r') as f:
+                f.seek(length)
+                data = f.read()
+                return data
 
 
 def kill_process(process):
-  '''
+    '''
   Stop a opened subprocess.
   '''
-  process.kill()
+    if type(process) == int:  # pid, use os.kill to terminate
+        pid = process
+        try:
+            os.kill(process, signal.SIGKILL)
+            # delete file ${pid} if exists
+        except Exception:
+            pass
+    else:
+        pid = process.pid
+        process.kill()
+        try:
+            process.wait(10)
+        except Exception:
+            pass
+    if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
+        with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
+                  'r') as f:
+            logfilename = f.read()
+        # delete file ${logfilename} if exists
+        if os.path.exists(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
+            os.remove(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename)))
+        os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)))
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index b4493e4ea..450b36757 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -12,30 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =======================================================================
-import base64
 import json
 import os
-import tempfile
-from collections import deque
-from pathlib import Path
-from threading import Lock
-import threading
-from multiprocessing import Process
 import socket
 import time
+from multiprocessing import Process
+from pathlib import Path
 
 import requests
-from flask import request
 
+from .fastdeploy_client.client_app import create_gradio_client_app
+from .fastdeploy_lib import analyse_config
+from .fastdeploy_lib import exchange_format_to_original_format
 from .fastdeploy_lib import get_process_output
-from .fastdeploy_lib import json2pbtxt,analyse_config
-from .fastdeploy_lib import exchange_format_to_original_format, original_format_to_exchange_format
+from .fastdeploy_lib import json2pbtxt
 from .fastdeploy_lib import kill_process
 from .fastdeploy_lib import launch_process
-from .fastdeploy_lib import pbtxt2json
+from .fastdeploy_lib import original_format_to_exchange_format
 from visualdl.server.api import gen_result
 from visualdl.server.api import result
-from .fastdeploy_client.client_app import create_gradio_client_app
+from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
 
 
 class FastDeployServerApi(object):
@@ -63,16 +59,19 @@ def get_directory(self, cur_dir):
 
     @result()
     def get_config(self, cur_dir):
-        all_model_configs, all_model_versions, all_model_paths  = analyse_config(cur_dir)
+        all_model_configs, all_model_versions, all_model_paths = analyse_config(
+            cur_dir)
         for name, value in all_model_paths.items():
-            self.model_paths[(Path(os.path.abspath(cur_dir)),name)] = value
-        return original_format_to_exchange_format(all_model_configs, all_model_versions)
+            self.model_paths[(Path(os.path.abspath(cur_dir)), name)] = value
+        return original_format_to_exchange_format(all_model_configs,
+                                                  all_model_versions)
 
     @result()
     def config_update(self, cur_dir, model_name, config):
         config = json.loads(config)
         all_models = exchange_format_to_original_format(config)
-        model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), model_name)]
+        model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)),
+                                      model_name)]
         text_proto = json2pbtxt(json.dumps(all_models[model_name]))
         with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
             f.write(text_proto)
@@ -88,21 +87,34 @@ def start_server(self, configs):
     @result()
     def stop_server(self, server_id):
         server_id = int(server_id)
-        if server_id not in self.opened_servers:
-            return
-        kill_process(self.opened_servers[server_id])
-        del self.opened_servers[server_id]
-
-    @result('application/octet-stream')
-    def get_server_output(self, server_id):
+        if server_id in self.opened_servers:  # check if server_id in self.opened_servers
+            kill_process(self.opened_servers[server_id])
+            del self.opened_servers[server_id]
+        elif str(server_id) in set(
+                os.listdir(FASTDEPLOYSERVER_PATH)):  # check if server_id in
+            # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
+            kill_process(server_id)
+        # check if there are servers killed by other vdl app instance and become zoombie
+        for server_id, process in self.opened_servers.items():
+            if process.poll() is not None:
+                del self.opened_servers[server_id]
+
+    @result('text/plain')
+    def get_server_output(self, server_id, length):
         server_id = int(server_id)
-        if server_id not in self.opened_servers:
+        length = int(length)
+        if server_id in self.opened_servers:  # check if server_id in self.opened_servers
+            return get_process_output(server_id, length)
+        elif str(server_id) in set(
+                os.listdir(FASTDEPLOYSERVER_PATH)):  # check if server_id in
+            # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
+            return get_process_output(server_id, length)
+        else:
             return
-        stdout_generator = get_process_output(self.opened_servers[server_id])
-        return stdout_generator
-    
+
     def create_fastdeploy_client(self):
         if self.client_port is None:
+
             def get_free_tcp_port():
                 tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                 # tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
@@ -113,12 +125,15 @@ def get_free_tcp_port():
 
             self.client_port = get_free_tcp_port()
             app = create_gradio_client_app()
-            thread = Process(target=app.launch, kwargs={'server_port': self.client_port})
+            thread = Process(
+                target=app.launch, kwargs={'server_port': self.client_port})
             thread.start()
+
             def check_alive():
                 while True:
                     try:
-                        requests.get('http://localhost:{}/'.format(self.client_port))
+                        requests.get('http://localhost:{}/'.format(
+                            self.client_port))
                         break
                     except Exception:
                         time.sleep(1)
@@ -135,7 +150,7 @@ def create_fastdeploy_api_call():
         'get_config': (api.get_config, ['dir']),
         'start_server': (api.start_server, ['config']),
         'stop_server': (api.stop_server, ['server_id']),
-        'get_server_output': (api.get_server_output, ['server_id']),
+        'get_server_output': (api.get_server_output, ['server_id', 'length']),
         'create_fastdeploy_client': (api.create_fastdeploy_client, [])
     }
 
diff --git a/visualdl/utils/dir.py b/visualdl/utils/dir.py
index 4e9ccd63f..13aada1c4 100644
--- a/visualdl/utils/dir.py
+++ b/visualdl/utils/dir.py
@@ -12,21 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =======================================================================
-
-import os
 import json
-
+import os
 
 VDL_SERVER = "https://www.paddlepaddle.org.cn/paddle/visualdl/service/server"
 
-default_vdl_config = {
-    'server_url': VDL_SERVER
-}
+default_vdl_config = {'server_url': VDL_SERVER}
 
 USER_HOME = os.path.expanduser('~')
 VDL_HOME = os.path.join(USER_HOME, '.visualdl')
 CONF_HOME = os.path.join(VDL_HOME, 'conf')
 CONFIG_PATH = os.path.join(CONF_HOME, 'config.json')
+FASTDEPLOYSERVER_PATH = os.path.join(VDL_HOME, 'fastdeployserver')
 
 
 def init_vdl_config():
@@ -35,3 +32,5 @@ def init_vdl_config():
     if not os.path.exists(CONFIG_PATH) or 0 == os.path.getsize(CONFIG_PATH):
         with open(CONFIG_PATH, 'w') as fp:
             fp.write(json.dumps(default_vdl_config))
+    if not os.path.exists(FASTDEPLOYSERVER_PATH):
+        os.makedirs(FASTDEPLOYSERVER_PATH, exist_ok=True)

From 380b950d97001fde35e695d889c6065ade8542fc Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 2 Dec 2022 14:31:33 +0800
Subject: [PATCH 07/48] add fastdeploy server and client

---
 requirements.txt                              |   4 +
 .../inference/fastdeploy_client/client_app.py | 381 ++++++++++++++++++
 .../fastdeploy_client/http_client_manager.py  |  92 +++++
 .../inference/fastdeploy_client/visualizer.py | 102 +++++
 4 files changed, 579 insertions(+)
 create mode 100644 visualdl/component/inference/fastdeploy_client/client_app.py
 create mode 100644 visualdl/component/inference/fastdeploy_client/http_client_manager.py
 create mode 100644 visualdl/component/inference/fastdeploy_client/visualizer.py

diff --git a/requirements.txt b/requirements.txt
index 12588e37c..b3a3bd220 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,7 @@ multiprocess
 packaging
 x2paddle
 rarfile
+gradio
+fastdeploy-python
+tritonclient[all]
+attrdict
diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
new file mode 100644
index 000000000..c2428e5ac
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -0,0 +1,381 @@
+import gradio as gr
+import numpy as np
+
+from .http_client_manager import HttpClientManager
+from .visualizer import visualize_detection
+from .visualizer import visualize_face_detection
+from .visualizer import visualize_keypoint_detection
+from .visualizer import visualize_matting
+from .visualizer import visualize_ocr
+from .visualizer import visualize_segmentation
+
+_http_manager = HttpClientManager()
+
+supported_tasks = {
+    'detection': visualize_detection,
+    'facedet': visualize_face_detection,
+    'keypointdetection': visualize_keypoint_detection,
+    'segmentation': visualize_segmentation,
+    'matting': visualize_matting,
+    'ocr': visualize_ocr,
+    'others(raw data)': lambda x: str(x)
+}
+
+
+def create_gradio_client_app():  # noqa:C901
+    css = """
+          .gradio-container {
+              font-family: 'IBM Plex Sans', sans-serif;
+          }
+          .gr-button {
+              color: white;
+              border-color: black;
+              background: black;
+          }
+          input[type='range'] {
+              accent-color: black;
+          }
+          .dark input[type='range'] {
+              accent-color: #dfdfdf;
+          }
+          .container {
+              max-width: 1200px;
+              margin: auto;
+              padding-top: 1.5rem;
+          }
+          #gallery {
+              min-height: 22rem;
+              margin-bottom: 15px;
+              margin-left: auto;
+              margin-right: auto;
+              border-bottom-right-radius: .5rem !important;
+              border-bottom-left-radius: .5rem !important;
+          }
+          #gallery>div>.h-full {
+              min-height: 20rem;
+          }
+          .details:hover {
+              text-decoration: underline;
+          }
+          .gr-button {
+              white-space: nowrap;
+          }
+          .gr-button:focus {
+              border-color: rgb(147 197 253 / var(--tw-border-opacity));
+              outline: none;
+              box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
+              --tw-border-opacity: 1;
+              --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) \
+                var(--tw-ring-offset-color);
+              --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
+              --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
+              --tw-ring-opacity: .5;
+          }
+          .footer {
+              margin-bottom: 45px;
+              margin-top: 35px;
+              text-align: center;
+              border-bottom: 1px solid #e5e5e5;
+          }
+          .footer>p {
+              font-size: .8rem;
+              display: inline-block;
+              padding: 0 10px;
+              transform: translateY(10px);
+              background: white;
+          }
+          .dark .footer {
+              border-color: #303030;
+          }
+          .dark .footer>p {
+              background: #0b0f19;
+          }
+          .prompt h4{
+              margin: 1.25em 0 .25em 0;
+              font-weight: bold;
+              font-size: 115%;
+          }
+  """
+
+    block = gr.Blocks(css=css)
+
+    with block:
+        gr.HTML("""
+              <div style="text-align: center; max-width: 650px; margin: 0 auto;">
+                <div
+                  style="
+                    display: inline-flex;
+                    gap: 0.8rem;
+                    font-size: 1.75rem;
+                    justify-content: center;
+                  "
+                >
+                <h1>
+                FastDeploy Client
+                </h1>
+                </div>
+                <p font-size: 94%">
+                The client is used for creating requests to fastdeploy server.
+                </p>
+              </div>
+          """)
+        with gr.Group():
+            with gr.Box():
+                with gr.Column():
+                    with gr.Row():
+                        server_addr_text = gr.Textbox(
+                            label="Server address",
+                            show_label=True,
+                            max_lines=1,
+                            placeholder="localhost:8000",
+                        )
+
+                        model_name_text = gr.Textbox(
+                            label="model name",
+                            show_label=True,
+                            max_lines=1,
+                            placeholder="yolov5",
+                        )
+
+                        model_version_text = gr.Textbox(
+                            label="model version",
+                            show_label=True,
+                            max_lines=1,
+                            placeholder="1",
+                        )
+
+                    check_button = gr.Button("GetInputOutputName")
+
+            with gr.Box():
+                gr.Markdown("Inputs")
+                with gr.Tab("component format"):
+                    gr.Markdown(
+                        "Fill inputs according to your need, choose either image or text for each input."
+                    )
+                    with gr.Column():
+                        with gr.Accordion("input 1"):
+                            input_name_1_text = gr.Textbox(
+                                label="input name", interactive=False)
+                            input_1_image = gr.Image(type='numpy')
+                            input_1_text = gr.Textbox(
+                                label="contents", max_lines=1000)
+                        with gr.Accordion("input 2", open=False):
+                            input_name_2_text = gr.Textbox(
+                                label="input name", interactive=False)
+                            input_2_image = gr.Image(type='numpy')
+                            input_2_text = gr.Textbox(
+                                label="contents", max_lines=1000)
+
+                        with gr.Accordion("input 3", open=False):
+                            input_name_3_text = gr.Textbox(
+                                label="input name", interactive=False)
+                            input_3_image = gr.Image(type='numpy')
+                            input_3_text = gr.Textbox(
+                                label="contents", max_lines=1000)
+                    with gr.Box():
+                        gr.Markdown("Outputs")
+                        with gr.Column():
+                            with gr.Accordion("output 1"):
+                                output_name_1_text = gr.Textbox(
+                                    label="output name", interactive=False)
+                                task_select_items1 = gr.Dropdown(
+                                    choices=list(supported_tasks.keys()),
+                                    value='others(raw data)',
+                                    label='task type')
+                                output_1_text = gr.Textbox(
+                                    label="raw data",
+                                    interactive=False,
+                                    show_label=True)
+                                output_1_image = gr.Image(interactive=False)
+                            with gr.Accordion("output 2", open=False):
+                                output_name_2_text = gr.Textbox(
+                                    label="output name", interactive=False)
+                                task_select_items2 = gr.Dropdown(
+                                    choices=list(supported_tasks.keys()),
+                                    value='others(raw data)',
+                                    label='task type')
+                                output_2_text = gr.Textbox(
+                                    label="raw data",
+                                    interactive=False,
+                                    show_label=True,
+                                )
+                                output_2_image = gr.Image(interactive=False)
+
+                            with gr.Accordion("output 3", open=False):
+                                output_name_3_text = gr.Textbox(
+                                    label="output name", interactive=False)
+                                task_select_items3 = gr.Dropdown(
+                                    choices=list(supported_tasks.keys()),
+                                    value='others(raw data)',
+                                    label='task type')
+                                output_3_text = gr.Textbox(
+                                    label="raw data",
+                                    interactive=False,
+                                    show_label=True)
+                                output_3_image = gr.Image(interactive=False)
+                    component_submit_button = gr.Button("submit")
+                with gr.Tab("raw format"):
+                    raw_payload_text = gr.Textbox(
+                        label="request payload", max_lines=10000)
+                    with gr.Box():
+                        gr.Markdown("Outputs")
+                        with gr.Column():
+                            output_raw_text = gr.Textbox(
+                                label="raw data", interactive=False)
+                    raw_submit_button = gr.Button("submit")
+
+            status_text = gr.Textbox(
+                label="status",
+                show_label=True,
+                max_lines=1,
+                interactive=False)
+        all_input_output_components = [
+            input_name_1_text, input_name_2_text, input_name_3_text,
+            input_1_image, input_2_image, input_3_image, input_1_text,
+            input_2_text, input_3_text, output_name_1_text, output_name_2_text,
+            output_name_3_text, output_1_text, output_2_text, output_3_text,
+            output_1_image, output_2_image, output_3_image, task_select_items1,
+            task_select_items2, task_select_items3
+        ]
+
+        def get_input_output_name(server_addr, model_name, model_version):
+            try:
+                input_metas, output_metas = _http_manager.get_model_meta(
+                    server_addr, model_name, model_version)
+            except Exception as e:
+                return {status_text: str(e)}
+            input_name_texts = [
+                input_name_1_text, input_name_2_text, input_name_3_text
+            ]
+            output_name_texts = [
+                output_name_1_text, output_name_2_text, output_name_3_text
+            ]
+            results = {
+                component: None
+                for component in all_input_output_components
+            }
+            results[task_select_items1] = 'others(raw data)'
+            results[task_select_items2] = 'others(raw data)'
+            results[task_select_items3] = 'others(raw data)'
+            results[status_text] = 'GetInputOutputName Successful'
+            for i, input_meta in enumerate(input_metas):
+                results[input_name_texts[i]] = input_meta['name']
+            for i, output_meta in enumerate(output_metas):
+                results[output_name_texts[i]] = output_meta['name']
+            return results
+
+        def component_inference(*args):
+            server_addr = args[0]
+            model_name = args[1]
+            model_version = args[2]
+            input_name_1 = args[3]
+            input_1_image_data = args[4]
+            input_1_text_data = args[5]
+            input_name_2 = args[6]
+            input_2_image_data = args[7]
+            input_2_text_data = args[8]
+            input_name_3 = args[9]
+            input_3_image_data = args[10]
+            input_3_text_data = args[11]
+            task_select_items1_data = args[12]
+            task_select_items2_data = args[13]
+            task_select_items3_data = args[14]
+            if server_addr and model_name and model_version:
+                inputs = {}
+                if input_name_1:
+                    if input_1_image_data is not None:
+                        inputs[input_name_1] = np.array([input_1_image_data])
+                    if input_1_text_data:
+                        inputs[input_name_1] = np.array(
+                            [[input_1_text_data.encode('utf-8')]],
+                            dtype=np.object_)
+                if input_name_2:
+                    if input_2_image_data is not None:
+                        inputs[input_name_2] = np.array([input_2_image_data])
+                    if input_2_text_data:
+                        inputs[input_name_2] = np.array(
+                            [[input_2_text_data.encode('utf-8')]],
+                            dtype=np.object_)
+                if input_name_3:
+                    if input_3_image_data is not None:
+                        inputs[input_name_3] = np.array([input_3_image_data])
+                    if input_3_text_data:
+                        inputs[input_name_3] = np.array(
+                            [[input_3_text_data.encode('utf-8')]],
+                            dtype=np.object_)
+                try:
+                    infer_results = _http_manager.infer(
+                        server_addr, model_name, model_version, inputs)
+                    results = {status_text: 'Inference Successful'}
+                    output_name_texts = [
+                        output_name_1_text, output_name_2_text,
+                        output_name_3_text
+                    ]
+                    output_texts = [
+                        output_1_text, output_2_text, output_3_text
+                    ]
+                    output_images = [
+                        output_1_image, output_2_image, output_3_image
+                    ]
+                    output_task_types = [
+                        task_select_items1_data, task_select_items2_data,
+                        task_select_items3_data
+                    ]
+                    for i, (output_name,
+                            data) in enumerate(infer_results.items()):
+                        results[output_name_texts[i]] = output_name
+                        results[output_texts[i]] = str(data)
+                        if output_task_types[i] != 'others(raw data)':
+                            results[output_images[i]] = supported_tasks[
+                                output_task_types[i]](input_1_image_data, data)
+                    return results
+                except Exception as e:
+                    return {status_text: 'Error: {}'.format(e)}
+            else:
+                return {
+                    status_text:
+                    'Please input server addr, model name and model version.'
+                }
+
+        def raw_inference(*args):
+            server_addr = args[0]
+            model_name = args[1]
+            model_version = args[2]
+            payload_text = args[3]
+            try:
+                result = _http_manager.raw_infer(server_addr, model_name,
+                                                 model_version, payload_text)
+                results = {
+                    status_text: 'Get response from server',
+                    output_raw_text: result
+                }
+                return results
+            except Exception as e:
+                return {status_text: 'Error: {}'.format(e)}
+
+        check_button.click(
+            fn=get_input_output_name,
+            inputs=[server_addr_text, model_name_text, model_version_text],
+            outputs=[*all_input_output_components, status_text])
+        component_submit_button.click(
+            fn=component_inference,
+            inputs=[
+                server_addr_text, model_name_text, model_version_text,
+                input_name_1_text, input_1_image, input_1_text,
+                input_name_2_text, input_2_image, input_2_text,
+                input_name_3_text, input_3_image, input_3_text,
+                task_select_items1, task_select_items2, task_select_items3
+            ],
+            outputs=[
+                output_name_1_text, output_name_2_text, output_name_3_text,
+                output_1_text, output_2_text, output_3_text, output_1_image,
+                output_2_image, output_3_image, status_text
+            ])
+        raw_submit_button.click(
+            fn=raw_inference,
+            inputs=[
+                server_addr_text, model_name_text, model_version_text,
+                raw_payload_text
+            ],
+            outputs=[output_raw_text, status_text])
+    return block
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
new file mode 100644
index 000000000..e9a6ef4d0
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -0,0 +1,92 @@
+import json
+
+import requests
+import tritonclient.http as httpclient
+from attrdict import AttrDict
+from tritonclient.utils import InferenceServerException
+
+
+def convert_http_metadata_config(metadata):
+    metadata = AttrDict(metadata)
+
+    return metadata
+
+
+def prepare_request(inputs_meta, inputs_data, outputs_meta):
+    '''
+    inputs_meta: inputs meta information from model. name: info
+    inputs_data: users input data. name: data
+    '''
+    # Set the input data
+    inputs = []
+    for input_dict in inputs_meta:
+        input_name = input_dict['name']
+        if input_name not in inputs_data:
+            raise RuntimeError(
+                'Error: input name {} required for model not existed.'.format(
+                    input_name))
+        infer_input = httpclient.InferInput(
+            input_name, inputs_data[input_name].shape, input_dict['datatype'])
+        infer_input.set_data_from_numpy(inputs_data[input_name])
+        inputs.append(infer_input)
+    outputs = []
+    for output_dict in outputs_meta:
+        infer_output = httpclient.InferRequestedOutput(output_dict.name)
+        outputs.append(infer_output)
+    return inputs, outputs
+
+
+class HttpClientManager:
+    def __init__(self):
+        self.clients = {}  # server url: httpclient
+
+    def _create_client(self, server_url):
+        if server_url in self.clients:
+            return self.clients[server_url]
+        try:
+            fastdeploy_client = httpclient.InferenceServerClient(server_url)
+            self.clients[server_url] = fastdeploy_client
+            return fastdeploy_client
+        except Exception:
+            raise RuntimeError(
+                'Can not connect to server {}, please check your \
+        server address'.format(server_url))
+
+    def infer(self, server_url, model_name, model_version, inputs):
+        fastdeploy_client = self._create_client(server_url)
+        input_metadata, output_metadata = self.get_model_meta(
+            server_url, model_name, model_version)
+        inputs, outputs = prepare_request(input_metadata, inputs,
+                                          output_metadata)
+        response = fastdeploy_client.infer(
+            model_name, inputs, model_version=model_version, outputs=outputs)
+        results = {}
+        for output in output_metadata:
+            result = response.as_numpy(output.name)  # datatype: numpy
+            if output.datatype == 'BYTES':
+                result = result[0][0]  # datatype: bytes
+                result = json.loads(result)  # datatype: json
+            else:
+                result = result[0]
+            results[output.name] = result
+        return results
+
+    def raw_infer(self, server_url, model_name, model_version, raw_input):
+        url = 'http://{}/v2/models/{}/versions/{}/infer'.format(
+            server_url, model_name, model_version)
+        res = requests.post(url, data=json.dumps(json.loads(raw_input)))
+        return json.dumps(res.json())
+
+    def get_model_meta(self, server_url, model_name, model_version):
+        fastdeploy_client = self._create_client(server_url)
+        try:
+            model_metadata = fastdeploy_client.get_model_metadata(
+                model_name=model_name, model_version=model_version)
+        except InferenceServerException as e:
+            raise RuntimeError("Failed to retrieve the metadata: " + str(e))
+
+        model_metadata = convert_http_metadata_config(model_metadata)
+
+        input_metadata = model_metadata.inputs
+        output_metadata = model_metadata.outputs
+        return input_metadata, output_metadata
diff --git a/visualdl/component/inference/fastdeploy_client/visualizer.py b/visualdl/component/inference/fastdeploy_client/visualizer.py
new file mode 100644
index 000000000..5a255f6ab
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_client/visualizer.py
@@ -0,0 +1,102 @@
+import fastdeploy as fd
+import numpy as np
+
+__all__ = [
+    'visualize_detection', 'visualize_keypoint_detection',
+    'visualize_face_detection', 'visualize_segmentation', 'visualize_matting',
+    'visualize_ocr'
+]
+
+
+def visualize_detection(image, data):
+    boxes = np.array(data['boxes'])
+    scores = np.array(data['scores'])
+    label_ids = np.array(data['label_ids'])
+    masks = np.array(data['masks'])
+    contain_masks = data['contain_masks']
+    detection_result = fd.C.vision.DetectionResult()
+    detection_result.boxes = boxes
+    detection_result.scores = scores
+    detection_result.label_ids = label_ids
+    detection_result.masks = masks
+    detection_result.contain_masks = contain_masks
+    result = fd.vision.vis_detection(image, detection_result)
+    return result
+
+
+def visualize_keypoint_detection(image, data):
+    keypoints = np.array(data['keypoints'])
+    scores = np.array(data['scores'])
+    num_joints = np.array(data['num_joints'])
+
+    detection_result = fd.C.vision.KeyPointDetectionResult()
+    detection_result.keypoints = keypoints
+    detection_result.scores = scores
+    detection_result.num_joints = num_joints
+
+    result = fd.vision.vis_keypoint_detection(image, detection_result)
+    return result
+
+
+def visualize_face_detection(image, data):
+    data = np.array(data['data'])
+    scores = np.array(data['scores'])
+    landmarks = np.array(data['landmarks'])
+    landmarks_per_face = data['landmarks_per_face']
+
+    detection_result = fd.C.vision.FaceDetectionResult()
+    detection_result.data = data
+    detection_result.scores = scores
+    detection_result.landmarks = landmarks
+    detection_result.landmarks_per_face = landmarks_per_face
+
+    result = fd.vision.vis_face_detection(image, detection_result)
+    return result
+
+
+def visualize_segmentation(image, data):
+    label_ids = np.array(data['label_ids'])
+    score_map = np.array(data['score_map'])
+    shape = np.array(data['shape'])
+
+    segmentation_result = fd.C.vision.SegmentationResult()
+    segmentation_result.shape = shape
+    segmentation_result.score_map = score_map
+    segmentation_result.label_ids = label_ids
+
+    result = fd.vision.vis_segmentation(image, segmentation_result)
+    return result
+
+
+def visualize_matting(image, data):
+    alpha = np.array(data['alpha'])
+    foreground = np.array(data['foreground'])
+    contain_foreground = data['contain_foreground']
+    shape = np.array(data['shape'])
+
+    matting_result = fd.C.vision.MattingResult()
+    matting_result.alpha = alpha
+    matting_result.foreground = foreground
+    matting_result.contain_foreground = contain_foreground
+    matting_result.shape = shape
+
+    result = fd.vision.vis_matting(image, matting_result)
+    return result
+
+
+def visualize_ocr(image, data):
+    boxes = np.array(data['boxes'])
+    text = np.array(data['text'])
+    rec_scores = np.array(data['rec_scores'])
+    cls_scores = np.array(data['cls_scores'])
+    cls_labels = data['cls_labels']
+
+    ocr_result = fd.C.vision.OCRResult()
+    ocr_result.boxes = boxes
+    ocr_result.text = text
+    ocr_result.rec_scores = rec_scores
+    ocr_result.cls_scores = cls_scores
+    ocr_result.cls_labels = cls_labels
+
+    result = fd.vision.vis_ppocr(image, ocr_result)
+    return result

From 16e55fd1f35213471a0fb91f9b63362fde95129f Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 7 Dec 2022 11:17:37 +0800
Subject: [PATCH 08/48] add exception description

---
 visualdl/component/inference/fastdeploy_server.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 450b36757..7801bf281 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -81,6 +81,10 @@ def config_update(self, cur_dir, model_name, config):
     def start_server(self, configs):
         configs = json.loads(configs)
         process = launch_process(configs)
+        if process.poll() is not None:
+            raise RuntimeError(
+                "Launch fastdeploy server failed, please check your launching arguments"
+            )
         self.opened_servers[process.pid] = process
         return process.pid
 

From ef08cf23466902e9299fa99c3701a93ce9f74f74 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 7 Dec 2022 14:50:24 +0800
Subject: [PATCH 09/48] fix

---
 .../component/inference/fastdeploy_lib.py     | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 096976b05..a5b5ccd47 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -219,32 +219,32 @@ def analyse_step_relationships(step_config, inputs, outputs):
             for var in inputs:
                 if var['name'] not in vars_dict:
                     vars_dict[var['name']] = {}
-                    vars_dict[var['name']]['from_models'] = []
-                    vars_dict[var['name']]['to_models'] = []
-                vars_dict[var['name']]['from_models'].append('feed')
+                    vars_dict[var['name']]['from_models'] = set()
+                    vars_dict[var['name']]['to_models'] = set()
+                vars_dict[var['name']]['from_models'].add('feed')
             for var in outputs:
                 if var['name'] not in vars_dict:
                     vars_dict[var['name']] = {}
-                    vars_dict[var['name']]['from_models'] = []
-                    vars_dict[var['name']]['to_models'] = []
-                vars_dict[var['name']]['to_models'].append('fetch')
+                    vars_dict[var['name']]['from_models'] = set()
+                    vars_dict[var['name']]['to_models'] = set()
+                vars_dict[var['name']]['to_models'].add('fetch')
         else:
             for var_placehold_name, var_name in model_config_in_step[
                     'inputMap'].items():
                 if var_name not in vars_dict:
                     vars_dict[var_name] = {}
-                    vars_dict[var_name]['from_models'] = []
-                    vars_dict[var_name]['to_models'] = []
-                vars_dict[var_name]['to_models'].append(
+                    vars_dict[var_name]['from_models'] = set()
+                    vars_dict[var_name]['to_models'] = set()
+                vars_dict[var_name]['to_models'].add(
                     model_config_in_step['modelName'])
 
             for var_placehold_name, var_name in model_config_in_step[
                     'outputMap'].items():
                 if var_name not in vars_dict:
                     vars_dict[var_name] = {}
-                    vars_dict[var_name]['from_models'] = []
-                    vars_dict[var_name]['to_models'] = []
-                vars_dict[var_name]['from_models'].append(
+                    vars_dict[var_name]['from_models'] = set()
+                    vars_dict[var_name]['to_models'] = set()
+                vars_dict[var_name]['from_models'].add(
                     model_config_in_step['modelName'])
     for var_name, relationships in vars_dict.items():
         for from_model in relationships['from_models']:

From 50f080270953d56ed19864e63871a34b103a2b1c Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 7 Dec 2022 17:15:00 +0800
Subject: [PATCH 10/48] add model repository judgement

---
 visualdl/component/inference/fastdeploy_lib.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index a5b5ccd47..89df30e3c 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -67,6 +67,9 @@ def analyse_config(cur_dir: str):
                         all_model_versions[model_name][model_sub_dir] = []
                     all_model_versions[model_name][model_sub_dir].append(
                         version_resource_file)
+    if not all_model_configs:
+        raise Exception(
+            'Not a valid model repository, please choose the right path')
     return all_model_configs, all_model_versions, all_model_paths
 
 

From 66064d26ad5c231fa3f6f2251e57f5c149863e82 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 8 Dec 2022 10:56:16 +0800
Subject: [PATCH 11/48] add component tab for fastdeploy client

---
 visualdl/server/api.py  | 5 ++++-
 visualdl/server/args.py | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/visualdl/server/api.py b/visualdl/server/api.py
index 502bf48f0..0ef7b6dc1 100644
--- a/visualdl/server/api.py
+++ b/visualdl/server/api.py
@@ -417,7 +417,10 @@ def get_component_tabs(*apis, vdl_args, request_args):
             all_tabs.update(api('component_tabs', request_args))
             all_tabs.add('static_graph')
     else:
-        return ['static_graph', 'x2paddle', 'fastdeploy_server']
+        return [
+            'static_graph', 'x2paddle', 'fastdeploy_server',
+            'fastdeploy_client'
+        ]
     return list(all_tabs)
 
 
diff --git a/visualdl/server/args.py b/visualdl/server/args.py
index cb42422c7..71f97afb1 100644
--- a/visualdl/server/args.py
+++ b/visualdl/server/args.py
@@ -78,7 +78,8 @@ def validate_args(args):
     supported_tabs = [
         'scalar', 'image', 'text', 'embeddings', 'audio', 'histogram',
         'hyper_parameters', 'static_graph', 'dynamic_graph', 'pr_curve',
-        'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server'
+        'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server',
+        'fastdeploy_client'
     ]
     if args.component_tabs is not None:
         for component_tab in args.component_tabs:

From 690f55d65546f8907016bb7ea9dceda0708e317f Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 8 Dec 2022 14:36:50 +0800
Subject: [PATCH 12/48] update more tasks in fastdeploy client

---
 .../inference/fastdeploy_client/client_app.py | 18 +++++++++
 .../fastdeploy_client/http_client_manager.py  | 14 +++++++
 .../inference/fastdeploy_client/visualizer.py | 39 ++++++++++++++++++-
 .../component/inference/fastdeploy_lib.py     | 14 +++++++
 4 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
index c2428e5ac..cc1925bb9 100644
--- a/visualdl/component/inference/fastdeploy_client/client_app.py
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -1,9 +1,25 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
 import gradio as gr
 import numpy as np
 
 from .http_client_manager import HttpClientManager
 from .visualizer import visualize_detection
+from .visualizer import visualize_face_alignment
 from .visualizer import visualize_face_detection
+from .visualizer import visualize_headpose
 from .visualizer import visualize_keypoint_detection
 from .visualizer import visualize_matting
 from .visualizer import visualize_ocr
@@ -18,6 +34,8 @@
     'segmentation': visualize_segmentation,
     'matting': visualize_matting,
     'ocr': visualize_ocr,
+    'facealignment': visualize_face_alignment,
+    'headpose': visualize_headpose,
     'others(raw data)': lambda x: str(x)
 }
 
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index e9a6ef4d0..0fb300023 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
 import json
 
 import requests
diff --git a/visualdl/component/inference/fastdeploy_client/visualizer.py b/visualdl/component/inference/fastdeploy_client/visualizer.py
index 5a255f6ab..5abe570e0 100644
--- a/visualdl/component/inference/fastdeploy_client/visualizer.py
+++ b/visualdl/component/inference/fastdeploy_client/visualizer.py
@@ -1,10 +1,25 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
 import fastdeploy as fd
 import numpy as np
 
 __all__ = [
     'visualize_detection', 'visualize_keypoint_detection',
-    'visualize_face_detection', 'visualize_segmentation', 'visualize_matting',
-    'visualize_ocr'
+    'visualize_face_detection', 'visualize_face_alignment',
+    'visualize_segmentation', 'visualize_matting', 'visualize_ocr',
+    'visualize_headpose'
 ]
 
 
@@ -54,6 +69,16 @@ def visualize_face_detection(image, data):
     return result
 
 
+def visualize_face_alignment(image, data):
+    landmarks = np.array(data['landmarks'])
+
+    facealignment_result = fd.C.vision.FaceAlignmentResult()
+    facealignment_result.landmarks = landmarks
+
+    result = fd.vision.vis_face_alignment(image, facealignment_result)
+    return result
+
+
 def visualize_segmentation(image, data):
     label_ids = np.array(data['label_ids'])
     score_map = np.array(data['score_map'])
@@ -100,3 +125,13 @@ def visualize_ocr(image, data):
 
     result = fd.vision.vis_ppocr(image, ocr_result)
     return result
+
+
+def visualize_headpose(image, data):
+    euler_angles = np.array(data['euler_angles'])
+
+    headpose_result = fd.C.vision.HeadPoseResult()
+    headpose_result.euler_angles = euler_angles
+
+    result = fd.vision.vis_headpose(image, headpose_result)
+    return result
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 89df30e3c..a9faec3e3 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
 import copy
 import json
 import os

From 78933331f290834b17862db08c26781171a0687e Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 8 Dec 2022 14:52:34 +0800
Subject: [PATCH 13/48] sort filenames

---
 visualdl/component/inference/fastdeploy_server.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 7801bf281..deaf2a69c 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -49,6 +49,7 @@ def get_directory(self, cur_dir):
         cur_dir, sub_dirs, filenames = os.walk(cur_dir).send(None)
         if Path(self.root_dir) != Path(os.path.abspath(cur_dir)):
             sub_dirs.append('..')
+        sub_dirs = sorted(sub_dirs)
         directorys = {
             'parent_dir':
             os.path.relpath(Path(os.path.abspath(cur_dir)), self.root_dir),

From e5e41b084a12be484dc1726322a879f00a2d47ee Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 8 Dec 2022 15:10:20 +0800
Subject: [PATCH 14/48] backup config

---
 visualdl/component/inference/fastdeploy_server.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index deaf2a69c..545357e7d 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =======================================================================
+import datetime
 import json
 import os
+import shutil
 import socket
 import time
 from multiprocessing import Process
@@ -74,6 +76,12 @@ def config_update(self, cur_dir, model_name, config):
         model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)),
                                       model_name)]
         text_proto = json2pbtxt(json.dumps(all_models[model_name]))
+        # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data
+        shutil.copy(
+            os.path.join(model_dir, 'config.pbtxt'),
+            os.path.join(
+                model_dir, 'config_vdlbackup_{}.pbtxt'.format(
+                    datetime.datetime.now().isoformat())))
         with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
             f.write(text_proto)
         return

From 2d857ae63b05c3a50b48797ecb3cea872f299464 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 8 Dec 2022 15:16:24 +0800
Subject: [PATCH 15/48] noqa for autogenerated file

---
 visualdl/component/inference/proto/model_config/protxt_pb2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/visualdl/component/inference/proto/model_config/protxt_pb2.py b/visualdl/component/inference/proto/model_config/protxt_pb2.py
index 4d5b645d4..70bf7b906 100644
--- a/visualdl/component/inference/proto/model_config/protxt_pb2.py
+++ b/visualdl/component/inference/proto/model_config/protxt_pb2.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: model_config.protxt

From 72052095c2898c19e01d68f18518e0fe78a93e98 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 9 Dec 2022 11:37:31 +0800
Subject: [PATCH 16/48] add data validation

---
 .../component/inference/fastdeploy_lib.py     | 31 ++++++++++++++++---
 .../component/inference/fastdeploy_server.py  |  4 ++-
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index a9faec3e3..37a5158c2 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -47,6 +47,22 @@ def json2pbtxt(content: str):
     return text_proto
 
 
+def validate_data(model_config):
+    '''
+    Validate data in model config, we should check empty value recieved from front end.
+    The easiest way to handle it is to drop empty value.
+    Args:
+        model_config: model config to be saved in config file
+    Return:
+        model config after filtering.
+    '''
+    model_config_filtered = {}
+    for key, value in model_config.items():
+        if value:
+            model_config_filtered[key] = value
+    return model_config_filtered
+
+
 def analyse_config(cur_dir: str):
     '''
   Analyse the model config in specified directory.
@@ -69,6 +85,8 @@ def analyse_config(cur_dir: str):
                     pbtxt2json(open(os.path.join(model_dir, filename)).read()))
                 all_model_configs[
                     model_name] = json_config  # store original config file content in json format
+                if 'name' not in json_config:
+                    json_config['name'] = model_name
         for model_sub_dir in model_sub_dirs:
             if re.match(
                     r'\d+',
@@ -220,7 +238,7 @@ def original_format_to_exchange_format(original_format, version_info):
     return exchange_format
 
 
-def analyse_step_relationships(step_config, inputs, outputs):
+def analyse_step_relationships(step_config, inputs, outputs):  # noqa: C901
     '''
   Analyse model relationships in ensemble step. And fill  \
     "inputModels", "outputModels", "inputVars", "outputVars" in step_config.
@@ -266,12 +284,15 @@ def analyse_step_relationships(step_config, inputs, outputs):
     for var_name, relationships in vars_dict.items():
         for from_model in relationships['from_models']:
             models_dict[from_model]['outputVars'].append(var_name)
-            models_dict[from_model]['outputModels'].extend(
-                relationships['to_models'])
+            for var_to_model in relationships['to_models']:
+                if var_to_model not in models_dict[from_model]['outputModels']:
+                    models_dict[from_model]['outputModels'].append(
+                        var_to_model)
         for to_model in relationships['to_models']:
             models_dict[to_model]['inputVars'].append(var_name)
-            models_dict[to_model]['inputModels'].extend(
-                relationships['from_models'])
+            for var_from_model in relationships['from_models']:
+                if var_from_model not in models_dict[to_model]['inputModels']:
+                    models_dict[to_model]['inputModels'].append(var_from_model)
 
 
 def launch_process(kwargs: dict):
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 545357e7d..ccb34d715 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -31,6 +31,7 @@
 from .fastdeploy_lib import kill_process
 from .fastdeploy_lib import launch_process
 from .fastdeploy_lib import original_format_to_exchange_format
+from .fastdeploy_lib import validate_data
 from visualdl.server.api import gen_result
 from visualdl.server.api import result
 from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
@@ -75,7 +76,8 @@ def config_update(self, cur_dir, model_name, config):
         all_models = exchange_format_to_original_format(config)
         model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)),
                                       model_name)]
-        text_proto = json2pbtxt(json.dumps(all_models[model_name]))
+        filtered_config = validate_data(all_models[model_name])
+        text_proto = json2pbtxt(json.dumps(filtered_config))
         # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data
         shutil.copy(
             os.path.join(model_dir, 'config.pbtxt'),

From 6ac4e5e91cf608e4cde684a6dbf4addc8bb5e6b1 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 9 Dec 2022 17:24:40 +0800
Subject: [PATCH 17/48] add __init__ for package

---
 visualdl/component/inference/fastdeploy_client/__init__.py  | 0
 visualdl/component/inference/proto/model_config/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 visualdl/component/inference/fastdeploy_client/__init__.py
 create mode 100644 visualdl/component/inference/proto/model_config/__init__.py

diff --git a/visualdl/component/inference/fastdeploy_client/__init__.py b/visualdl/component/inference/fastdeploy_client/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/visualdl/component/inference/proto/model_config/__init__.py b/visualdl/component/inference/proto/model_config/__init__.py
new file mode 100644
index 000000000..e69de29bb

From a4407b8fa3edc8fa492ffb0096c12adf213bc438 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Mon, 12 Dec 2022 11:40:51 +0800
Subject: [PATCH 18/48] add calculating layout for frontend

---
 .../component/inference/fastdeploy_lib.py     | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 37a5158c2..78ffd5dbb 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -19,6 +19,7 @@
 import re
 import signal
 import string
+from collections import defaultdict
 from subprocess import Popen
 from subprocess import STDOUT
 
@@ -293,6 +294,52 @@ def analyse_step_relationships(step_config, inputs, outputs):  # noqa: C901
             for var_from_model in relationships['from_models']:
                 if var_from_model not in models_dict[to_model]['inputModels']:
                     models_dict[to_model]['inputModels'].append(var_from_model)
+    calculate_layout_for_frontend(models_dict)
+
+
+def calculate_layout_for_frontend(model_config_in_step):
+    '''
+    Analyse model topology connections and prepare the positions for each model in layout.
+    Dynamic program algorithm:
+        depth(cur_node) = max([depth(prev_node) for prev_node in cur_node['inputModels']])
+    Args:
+        model_config_in_step(dict): model config in ensemble models' step, indexed by model name.
+    Returns:
+        None. Results calculated will be saved in place.
+    '''
+    path_depth = defaultdict(int)
+
+    def depth_recursive(model):
+        if model['modelName'] == 'feed':
+            path_depth[model['modelName']] = 0
+            return 0
+        if path_depth[model['modelName']] != 0:
+            return path_depth[model['modelName']]
+        path_depth[model['modelName']] = max([
+            depth_recursive(model_config_in_step[model_name]) for model_name in
+            model_config_in_step[model['modelName']]['inputModels']
+        ]) + 1
+        return path_depth[model['modelName']]
+
+    depth_recursive(model_config_in_step['fetch'])
+    path_depth_tuple = [
+        (k, v)
+        for k, v in sorted(path_depth.items(), key=lambda item: item[1])
+    ]
+    cur_x = 0
+    last_depth = -1
+    for model_name, depth in path_depth_tuple:
+        if depth == last_depth:
+            model_config_in_step[model_name]['pos_y'] = depth
+            model_config_in_step[model_name]['pos_x'] = cur_x
+            cur_x += 1
+        else:
+            cur_x = 0
+            model_config_in_step[model_name]['pos_y'] = depth
+            model_config_in_step[model_name]['pos_x'] = cur_x
+            cur_x += 1
+        last_depth = depth
+    return
 
 
 def launch_process(kwargs: dict):

From cf65c71e7072fae4bf94344bb7443f257d18a765 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 14 Dec 2022 16:41:48 +0800
Subject: [PATCH 19/48] add alive server detection and optimize client

---
 .../inference/fastdeploy_client/client_app.py | 252 +++++++-----------
 .../component/inference/fastdeploy_lib.py     | 137 +++++++++-
 .../component/inference/fastdeploy_server.py  |  18 +-
 visualdl/server/app.py                        |  52 +++-
 4 files changed, 303 insertions(+), 156 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
index cc1925bb9..fcc064b62 100644
--- a/visualdl/component/inference/fastdeploy_client/client_app.py
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -36,7 +36,7 @@
     'ocr': visualize_ocr,
     'facealignment': visualize_face_alignment,
     'headpose': visualize_headpose,
-    'others(raw data)': lambda x: str(x)
+    'unspecified': lambda x: str(x)
 }
 
 
@@ -56,11 +56,6 @@ def create_gradio_client_app():  # noqa:C901
           .dark input[type='range'] {
               accent-color: #dfdfdf;
           }
-          .container {
-              max-width: 1200px;
-              margin: auto;
-              padding-top: 1.5rem;
-          }
           #gallery {
               min-height: 22rem;
               margin-bottom: 15px;
@@ -162,84 +157,76 @@ def create_gradio_client_app():  # noqa:C901
                             placeholder="1",
                         )
 
-                    check_button = gr.Button("GetInputOutputName")
-
             with gr.Box():
-                gr.Markdown("Inputs")
                 with gr.Tab("component format"):
-                    gr.Markdown(
-                        "Fill inputs according to your need, choose either image or text for each input."
-                    )
-                    with gr.Column():
-                        with gr.Accordion("input 1"):
-                            input_name_1_text = gr.Textbox(
-                                label="input name", interactive=False)
-                            input_1_image = gr.Image(type='numpy')
-                            input_1_text = gr.Textbox(
-                                label="contents", max_lines=1000)
-                        with gr.Accordion("input 2", open=False):
-                            input_name_2_text = gr.Textbox(
-                                label="input name", interactive=False)
-                            input_2_image = gr.Image(type='numpy')
-                            input_2_text = gr.Textbox(
-                                label="contents", max_lines=1000)
-
-                        with gr.Accordion("input 3", open=False):
-                            input_name_3_text = gr.Textbox(
-                                label="input name", interactive=False)
-                            input_3_image = gr.Image(type='numpy')
-                            input_3_text = gr.Textbox(
-                                label="contents", max_lines=1000)
-                    with gr.Box():
-                        gr.Markdown("Outputs")
-                        with gr.Column():
-                            with gr.Accordion("output 1"):
-                                output_name_1_text = gr.Textbox(
-                                    label="output name", interactive=False)
-                                task_select_items1 = gr.Dropdown(
-                                    choices=list(supported_tasks.keys()),
-                                    value='others(raw data)',
-                                    label='task type')
-                                output_1_text = gr.Textbox(
-                                    label="raw data",
-                                    interactive=False,
-                                    show_label=True)
-                                output_1_image = gr.Image(interactive=False)
-                            with gr.Accordion("output 2", open=False):
-                                output_name_2_text = gr.Textbox(
-                                    label="output name", interactive=False)
-                                task_select_items2 = gr.Dropdown(
-                                    choices=list(supported_tasks.keys()),
-                                    value='others(raw data)',
-                                    label='task type')
-                                output_2_text = gr.Textbox(
-                                    label="raw data",
-                                    interactive=False,
-                                    show_label=True,
-                                )
-                                output_2_image = gr.Image(interactive=False)
+                    check_button = gr.Button("GetInputOutputName")
+                    component_format_column = gr.Column(visible=False)
+                    with component_format_column:
+                        task_radio = gr.Radio(
+                            choices=list(supported_tasks.keys()),
+                            value='unspecified',
+                            label='task type',
+                            visible=True)
+                        gr.Markdown(
+                            "Fill inputs according to your need, choose either image or text for each input."
+                        )
+                        with gr.Row():
+                            with gr.Column():
+                                gr.Markdown("Inputs")
+                                input_accordions = []
+                                input_name_texts = []
+                                input_images = []
+                                input_texts = []
+                                for i in range(6):
+                                    accordion = gr.Accordion(
+                                        "input {}".format(i),
+                                        open=True,
+                                        visible=False)
+                                    with accordion:
+                                        input_name_text = gr.Textbox(
+                                            label="input name",
+                                            interactive=False)
+                                        input_image = gr.Image(type='numpy')
+                                        input_text = gr.Textbox(
+                                            label="contents", max_lines=1000)
+                                    input_accordions.append(accordion)
+                                    input_name_texts.append(input_name_text)
+                                    input_images.append(input_image)
+                                    input_texts.append(input_text)
 
-                            with gr.Accordion("output 3", open=False):
-                                output_name_3_text = gr.Textbox(
-                                    label="output name", interactive=False)
-                                task_select_items3 = gr.Dropdown(
-                                    choices=list(supported_tasks.keys()),
-                                    value='others(raw data)',
-                                    label='task type')
-                                output_3_text = gr.Textbox(
-                                    label="raw data",
-                                    interactive=False,
-                                    show_label=True)
-                                output_3_image = gr.Image(interactive=False)
-                    component_submit_button = gr.Button("submit")
+                            with gr.Column():
+                                gr.Markdown("Outputs")
+                                output_accordions = []
+                                output_name_texts = []
+                                output_images = []
+                                output_texts = []
+                                for i in range(6):
+                                    accordion = gr.Accordion(
+                                        "output {}".format(i),
+                                        open=True,
+                                        visible=False)
+                                    with accordion:
+                                        output_name_text = gr.Textbox(
+                                            label="output name",
+                                            interactive=False)
+                                        output_text = gr.Textbox(
+                                            label="raw data",
+                                            interactive=False,
+                                            show_label=True)
+                                        output_image = gr.Image(
+                                            interactive=False)
+                                    output_accordions.append(accordion)
+                                    output_name_texts.append(output_name_text)
+                                    output_images.append(output_image)
+                                    output_texts.append(output_text)
+                        component_submit_button = gr.Button("submit")
                 with gr.Tab("raw format"):
                     raw_payload_text = gr.Textbox(
                         label="request payload", max_lines=10000)
-                    with gr.Box():
+                    with gr.Column():
                         gr.Markdown("Outputs")
-                        with gr.Column():
-                            output_raw_text = gr.Textbox(
-                                label="raw data", interactive=False)
+                        output_raw_text = gr.Textbox(
+                            label="raw data", interactive=False)
                     raw_submit_button = gr.Button("submit")
 
             status_text = gr.Textbox(
@@ -247,14 +234,8 @@ def create_gradio_client_app():  # noqa:C901
                 show_label=True,
                 max_lines=1,
                 interactive=False)
-        all_input_output_components = [
-            input_name_1_text, input_name_2_text, input_name_3_text,
-            input_1_image, input_2_image, input_3_image, input_1_text,
-            input_2_text, input_3_text, output_name_1_text, output_name_2_text,
-            output_name_3_text, output_1_text, output_2_text, output_3_text,
-            output_1_image, output_2_image, output_3_image, task_select_items1,
-            task_select_items2, task_select_items3
-        ]
+        all_input_output_components = input_accordions + input_name_texts + input_images + \
+            input_texts + output_accordions + output_name_texts + output_images + output_texts
 
         def get_input_output_name(server_addr, model_name, model_version):
             try:
@@ -262,23 +243,22 @@ def get_input_output_name(server_addr, model_name, model_version):
                     server_addr, model_name, model_version)
             except Exception as e:
                 return {status_text: str(e)}
-            input_name_texts = [
-                input_name_1_text, input_name_2_text, input_name_3_text
-            ]
-            output_name_texts = [
-                output_name_1_text, output_name_2_text, output_name_3_text
-            ]
             results = {
                 component: None
                 for component in all_input_output_components
             }
-            results[task_select_items1] = 'others(raw data)'
-            results[task_select_items2] = 'others(raw data)'
-            results[task_select_items3] = 'others(raw data)'
+            results[component_format_column] = gr.update(visible=True)
+            results[check_button] = gr.update(visible=False)
+            for input_accordio in input_accordions:
+                results[input_accordio] = gr.update(visible=False)
+            for output_accordio in output_accordions:
+                results[output_accordio] = gr.update(visible=False)
             results[status_text] = 'GetInputOutputName Successful'
             for i, input_meta in enumerate(input_metas):
+                results[input_accordions[i]] = gr.update(visible=True)
                 results[input_name_texts[i]] = input_meta['name']
             for i, output_meta in enumerate(output_metas):
+                results[output_accordions[i]] = gr.update(visible=True)
                 results[output_name_texts[i]] = output_meta['name']
             return results
 
@@ -286,66 +266,36 @@ def component_inference(*args):
             server_addr = args[0]
             model_name = args[1]
             model_version = args[2]
-            input_name_1 = args[3]
-            input_1_image_data = args[4]
-            input_1_text_data = args[5]
-            input_name_2 = args[6]
-            input_2_image_data = args[7]
-            input_2_text_data = args[8]
-            input_name_3 = args[9]
-            input_3_image_data = args[10]
-            input_3_text_data = args[11]
-            task_select_items1_data = args[12]
-            task_select_items2_data = args[13]
-            task_select_items3_data = args[14]
+            names = args[3:3 + len(input_name_texts)]
+            images = args[3 + len(input_name_texts):3 + len(input_name_texts) +
+                          len(input_images)]
+            texts = args[3 + len(input_name_texts) + len(input_images):3 +
+                         len(input_name_texts) + len(input_images) +
+                         len(input_texts)]
+            task_type = args[-1]
             if server_addr and model_name and model_version:
                 inputs = {}
-                if input_name_1:
-                    if input_1_image_data is not None:
-                        inputs[input_name_1] = np.array([input_1_image_data])
-                    if input_1_text_data:
-                        inputs[input_name_1] = np.array(
-                            [[input_1_text_data.encode('utf-8')]],
-                            dtype=np.object_)
-                if input_name_2:
-                    if input_2_image_data is not None:
-                        inputs[input_name_2] = np.array([input_2_image_data])
-                    if input_2_text_data:
-                        inputs[input_name_2] = np.array(
-                            [[input_2_text_data.encode('utf-8')]],
-                            dtype=np.object_)
-                if input_name_3:
-                    if input_3_image_data is not None:
-                        inputs[input_name_3] = np.array([input_3_image_data])
-                    if input_3_text_data:
-                        inputs[input_name_3] = np.array(
-                            [[input_3_text_data.encode('utf-8')]],
-                            dtype=np.object_)
+                for i, input_name in enumerate(names):
+                    if input_name:
+                        if images[i] is not None:
+                            inputs[input_name] = np.array([images[i]])
+                        if texts[i]:
+                            inputs[input_name] = np.array(
+                                [[texts[i].encode('utf-8')]], dtype=np.object_)
                 try:
                     infer_results = _http_manager.infer(
                         server_addr, model_name, model_version, inputs)
                     results = {status_text: 'Inference Successful'}
-                    output_name_texts = [
-                        output_name_1_text, output_name_2_text,
-                        output_name_3_text
-                    ]
-                    output_texts = [
-                        output_1_text, output_2_text, output_3_text
-                    ]
-                    output_images = [
-                        output_1_image, output_2_image, output_3_image
-                    ]
-                    output_task_types = [
-                        task_select_items1_data, task_select_items2_data,
-                        task_select_items3_data
-                    ]
                     for i, (output_name,
                             data) in enumerate(infer_results.items()):
                         results[output_name_texts[i]] = output_name
                         results[output_texts[i]] = str(data)
-                        if output_task_types[i] != 'others(raw data)':
-                            results[output_images[i]] = supported_tasks[
-                                output_task_types[i]](input_1_image_data, data)
+                        if task_type != 'unspecified':
+                            try:
+                                results[output_images[i]] = supported_tasks[
+                                    task_type](images[0], data)
+                            except Exception:
+                                results[output_images[i]] = None
                     return results
                 except Exception as e:
                     return {status_text: 'Error: {}'.format(e)}
@@ -374,20 +324,18 @@ def raw_inference(*args):
         check_button.click(
             fn=get_input_output_name,
             inputs=[server_addr_text, model_name_text, model_version_text],
-            outputs=[*all_input_output_components, status_text])
+            outputs=[
+                *all_input_output_components, check_button,
+                component_format_column, status_text
+            ])
         component_submit_button.click(
             fn=component_inference,
             inputs=[
                 server_addr_text, model_name_text, model_version_text,
-                input_name_1_text, input_1_image, input_1_text,
-                input_name_2_text, input_2_image, input_2_text,
-                input_name_3_text, input_3_image, input_3_text,
-                task_select_items1, task_select_items2, task_select_items3
+                *input_name_texts, *input_images, *input_texts, task_radio
             ],
             outputs=[
-                output_name_1_text, output_name_2_text, output_name_3_text,
-                output_1_text, output_2_text, output_3_text, output_1_image,
-                output_2_image, output_3_image, status_text
+                *output_name_texts, *output_images, *output_texts, status_text
             ])
         raw_submit_button.click(
             fn=raw_inference,
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 78ffd5dbb..80bc170ba 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -25,6 +25,7 @@
 
 import google.protobuf.json_format as json_format
 import google.protobuf.text_format as text_format
+import requests
 
 from .proto.model_config.protxt_pb2 import ModelConfig
 from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
@@ -347,9 +348,14 @@ def launch_process(kwargs: dict):
   Launch a fastdeploy server according to specified arguments.
   '''
     cmd = ['fastdeployserver']
+    start_args = {}
     for key, value in kwargs.items():
+        if key == 'default_model_name':  # Used to fill client model_name automatically
+            start_args[key] = value
+            pass
         cmd.append('--{}'.format(key))
         cmd.append('{}'.format(value))
+        start_args[key] = value
 
     logfilename = 'logfile-{}'.format(get_random_string(8))
     while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)):
@@ -364,8 +370,8 @@ def launch_process(kwargs: dict):
     with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(p.pid)),
               'w') as f:
         f.write(
-            logfilename
-        )  # filename ${p.pid} contain the real log filename ${logfilename}
+            logfilename + '\n' + json.dumps(start_args)
+        )  # filename ${p.pid} contain the real log filename ${logfilename}, and start arguments
     return p
 
 
@@ -376,6 +382,25 @@ def get_random_string(length):
     return result_str
 
 
+def get_start_arguments(server_id):
+    '''
+    Get the start arguments for fastdeployserver process.
+    Args:
+        server_id(int): fastdeployserver process id
+    Returns:
+        args(dict): launch arguments when start fastdeployserver process.
+    '''
+    args = {}
+    if os.path.exists(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+        with open(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+                'r') as f:
+            f.readline()
+            args = json.loads(f.read())
+    return args
+
+
 def get_process_output(pid, length):
     '''
   Get the standard output of a opened subprocess.
@@ -423,3 +448,111 @@ def kill_process(process):
             os.remove(
                 os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename)))
         os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)))
+
+
+def get_alive_fastdeploy_servers():
+    '''
+    Search pids in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \
+        some unexpectable reasons, delete log file.
+    '''
+    pids = [
+        name for name in os.listdir(FASTDEPLOYSERVER_PATH)
+        if 'logfile' not in name
+    ]
+    should_delete_pids = []
+    for pid in pids:
+        if check_process_alive(pid) is False:
+            if os.path.exists(
+                    os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
+                with open(
+                        os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
+                        'r') as f:
+                    logfilename = f.read()
+                # delete file ${logfilename} if exists
+                if os.path.exists(
+                        os.path.join(FASTDEPLOYSERVER_PATH,
+                                     '{}'.format(logfilename))):
+                    os.remove(
+                        os.path.join(FASTDEPLOYSERVER_PATH,
+                                     '{}'.format(logfilename)))
+                os.remove(
+                    os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)))
+            should_delete_pids.append(pid)
+    for pid in should_delete_pids:
+        pids.remove(pid)
+    return pids
+
+
+def check_process_alive(pid):
+    '''
+    Given a pid, check whether the process is alive or not.
+    Args:
+        pid(int): process id
+    Return:
+        status(bool): True if process is still alive.
+    '''
+    try:
+        os.kill(pid, 0)
+    except OSError:
+        return False
+    else:
+        return True
+
+
+_metric_column_name = {
+    "Model": {
+        "nv_inference_request_success", "nv_inference_request_failure",
+        "nv_inference_count", "nv_inference_exec_count",
+        "nv_inference_request_duration_us", "nv_inference_queue_duration_us",
+        "nv_inference_compute_input_duration_us",
+        "nv_inference_compute_infer_duration_us",
+        "nv_inference_compute_output_duration_us"
+    },
+    "GPU": {
+        "nv_gpu_power_usage", "nv_gpu_power_limit", "nv_energy_consumption",
+        "nv_gpu_utilization", "nv_gpu_memory_total_bytes",
+        "nv_gpu_memory_used_bytes"
+    },
+    "CPU": {
+        "nv_cpu_utilization", "nv_cpu_memory_total_bytes",
+        "nv_cpu_memory_used_bytes"
+    }
+}
+
+
+def generate_metric_table(server_addr, server_port):
+    model_table = {}
+    gpu_table = {}
+
+    res = requests.get("http://{}:{}/metrics")
+    metric_content = res.text
+    for content in metric_content.split('\n'):
+        if content.startwith('#'):
+            continue
+        else:
+            res = re.match(r'(\w+)({.*}) (\w+)',
+                           content)  # match output by server metrics interface
+            metric_name = res.group(1)
+            model = res.group(2)
+            value = res.group(3)
+            infos = {}
+            for info in model.split(','):
+                k, v = info.split('=')
+                v = v.strip('"')
+                infos[k] = v
+            for key, metric_names in _metric_column_name.items():
+                if metric_name in metric_names:
+                    if key == 'Model':
+                        model_name = infos['model']
+                        if model_name not in model_table:
+                            model_table[model_name] = {}
+                        model_table[model_name][metric_name] = value
+                    elif key == 'GPU':
+                        gpu_name = infos['gpu_uuid']
+                        gpu_table[gpu_name][metric_name] = value
+                    elif key == 'CPU':
+                        pass
+    results = {}
+    results['Model'] = model_table
+    results['GPU'] = gpu_table
+    return results
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index ccb34d715..4b02f829a 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -26,7 +26,10 @@
 from .fastdeploy_client.client_app import create_gradio_client_app
 from .fastdeploy_lib import analyse_config
 from .fastdeploy_lib import exchange_format_to_original_format
+from .fastdeploy_lib import generate_metric_table
+from .fastdeploy_lib import get_alive_fastdeploy_servers
 from .fastdeploy_lib import get_process_output
+from .fastdeploy_lib import get_start_arguments
 from .fastdeploy_lib import json2pbtxt
 from .fastdeploy_lib import kill_process
 from .fastdeploy_lib import launch_process
@@ -127,6 +130,17 @@ def get_server_output(self, server_id, length):
         else:
             return
 
+    @result()
+    def get_server_metric(self, server_id):
+        args = get_start_arguments(server_id)
+        host = 'localhost'
+        port = args.get('metrics-port', 8002)
+        return generate_metric_table(host, port)
+
+    @result()
+    def get_server_list(self):
+        return get_alive_fastdeploy_servers()
+
     def create_fastdeploy_client(self):
         if self.client_port is None:
 
@@ -166,7 +180,9 @@ def create_fastdeploy_api_call():
         'start_server': (api.start_server, ['config']),
         'stop_server': (api.stop_server, ['server_id']),
         'get_server_output': (api.get_server_output, ['server_id', 'length']),
-        'create_fastdeploy_client': (api.create_fastdeploy_client, [])
+        'create_fastdeploy_client': (api.create_fastdeploy_client, []),
+        'get_server_list': (api.get_server_list, []),
+        'get_server_metric': (api.get_server_metric, ['server_id'])
     }
 
     def call(path: str, args):
diff --git a/visualdl/server/app.py b/visualdl/server/app.py
index fc67738e6..8cf1dea42 100644
--- a/visualdl/server/app.py
+++ b/visualdl/server/app.py
@@ -19,6 +19,7 @@
 import sys
 import threading
 import time
+import urllib
 import webbrowser
 
 import requests
@@ -32,6 +33,7 @@
 
 import visualdl.server
 from visualdl import __version__
+from visualdl.component.inference.fastdeploy_lib import get_start_arguments
 from visualdl.component.inference.fastdeploy_server import create_fastdeploy_api_call
 from visualdl.component.inference.model_convert_server import create_model_convert_api_call
 from visualdl.component.profiler.profiler_server import create_profiler_api_call
@@ -170,11 +172,18 @@ def serve_fastdeploy_create_fastdeploy_client():
         try:
             if request.method == 'POST':
                 fastdeploy_api_call('create_fastdeploy_client', request.form)
+                request_args = request.form
             else:
                 fastdeploy_api_call('create_fastdeploy_client', request.args)
+                request_args = request.args
         except Exception as e:
             error_msg = '{}'.format(e)
             return make_response(error_msg)
+        args = urllib.parse.urlencode(request_args)
+        if args:
+            return redirect(
+                api_path + "/fastdeploy/fastdeploy_client/app?{}".format(args),
+                code=302)
         return redirect(
             api_path + "/fastdeploy/fastdeploy_client/app", code=302)
 
@@ -194,9 +203,11 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str):
         if request.method == 'POST':
             port = fastdeploy_api_call('create_fastdeploy_client',
                                        request.form)
+            request_args = request.form
         else:
             port = fastdeploy_api_call('create_fastdeploy_client',
                                        request.args)
+            request_args = request.args
         if path == 'app':
             proxy_url = request.url.replace(
                 request.host_url.rstrip('/') + api_path +
@@ -217,8 +228,47 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str):
             data=request.get_data(),
             cookies=request.cookies,
             allow_redirects=False)
+        if path == 'app':
+            content = resp.content
+            if request_args and 'server_id' in request_args:
+                server_id = request_args.get('server_id')
+                start_args = get_start_arguments(server_id)
+                http_port = start_args.get('http-port', '')
+                model_name = start_args.get('default_model_name', '')
+                content = content.decode()
+                try:
+                    default_server_addr = re.search(
+                        r'"label": "Server address".*?"value": "".*?}',
+                        content).group(0)
+                    cur_server_addr = default_server_addr.replace(
+                        '"value": ""',
+                        '"value": "localhost:{}"'.format(http_port))
+                    default_model_name = re.search(
+                        r'"label": "model name".*?"value": "".*?}',
+                        content).group(0)
+                    cur_model_name = default_model_name.replace(
+                        '"value": ""', '"value": "{}"'.format(model_name))
+                    default_model_version = re.search(
+                        r'"label": "model version".*?"value": "".*?}',
+                        content).group(0)
+                    cur_model_version = default_model_version.replace(
+                        '"value": ""', '"value": "{}"'.format('1'))
+                    if http_port:
+                        content = content.replace(default_server_addr,
+                                                  cur_server_addr)
+                    if model_name:
+                        content = content.replace(default_model_name,
+                                                  cur_model_name)
+                    content = content.replace(default_model_version,
+                                              cur_model_version)
+                except Exception:
+                    pass
+                finally:
+                    content = content.encode()
+        else:
+            content = resp.content
         headers = [(name, value) for (name, value) in resp.raw.headers.items()]
-        response = Response(resp.content, resp.status_code, headers)
+        response = Response(content, resp.status_code, headers)
         return response
 
     @app.route(api_path + '/component_tabs')

From fed60faf2036122233d7a883d20c53780bd0442f Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 14 Dec 2022 16:51:04 +0800
Subject: [PATCH 20/48] add alive server detection and optimize client

---
 visualdl/component/inference/fastdeploy_lib.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 80bc170ba..dcf36fe59 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -408,7 +408,7 @@ def get_process_output(pid, length):
     if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
         with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
                   'r') as f:
-            logfilename = f.read()
+            logfilename = f.readline().strip('\n')
         # delete file ${logfilename} if exists
         if os.path.exists(
                 os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
@@ -441,7 +441,7 @@ def kill_process(process):
     if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
         with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
                   'r') as f:
-            logfilename = f.read()
+            logfilename = f.readline().strip('\n')
         # delete file ${logfilename} if exists
         if os.path.exists(
                 os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
@@ -467,7 +467,7 @@ def get_alive_fastdeploy_servers():
                 with open(
                         os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
                         'r') as f:
-                    logfilename = f.read()
+                    logfilename = f.readline().strip('\n')
                 # delete file ${logfilename} if exists
                 if os.path.exists(
                         os.path.join(FASTDEPLOYSERVER_PATH,

From b9f0d078676249c65f454a0df6e7229fa761a410 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 14 Dec 2022 17:12:35 +0800
Subject: [PATCH 21/48] add alive server detection and optimize client

---
 visualdl/component/inference/fastdeploy_lib.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index dcf36fe59..80ff1800a 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -491,6 +491,7 @@ def check_process_alive(pid):
     Return:
         status(bool): True if process is still alive.
     '''
+    pid = int(pid)
     try:
         os.kill(pid, 0)
     except OSError:
@@ -524,14 +525,16 @@ def generate_metric_table(server_addr, server_port):
     model_table = {}
     gpu_table = {}
 
-    res = requests.get("http://{}:{}/metrics")
+    res = requests.get("http://{}:{}/metrics".format(server_addr, server_port))
     metric_content = res.text
     for content in metric_content.split('\n'):
-        if content.startwith('#'):
+        if content.startswith('#'):
             continue
         else:
-            res = re.match(r'(\w+)({.*}) (\w+)',
+            res = re.match(r'(\w+){(.*)} (\w+)',
                            content)  # match output by server metrics interface
+            if not res:
+                continue
             metric_name = res.group(1)
             model = res.group(2)
             value = res.group(3)
@@ -549,6 +552,8 @@ def generate_metric_table(server_addr, server_port):
                         model_table[model_name][metric_name] = value
                     elif key == 'GPU':
                         gpu_name = infos['gpu_uuid']
+                        if gpu_name not in gpu_table:
+                            gpu_table[gpu_name] = {}
                         gpu_table[gpu_name][metric_name] = value
                     elif key == 'CPU':
                         pass

From 99af968776feee1995b021097d89a032595302ac Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 15 Dec 2022 18:03:37 +0800
Subject: [PATCH 22/48] add metrics in gradio client

---
 .../inference/fastdeploy_client/client_app.py | 150 ++++++++++++-----
 .../fastdeploy_client/http_client_manager.py  | 159 ++++++++++++++++++
 .../component/inference/fastdeploy_server.py  |   5 +-
 visualdl/server/app.py                        |  34 ++--
 4 files changed, 293 insertions(+), 55 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
index fcc064b62..a6af465ff 100644
--- a/visualdl/component/inference/fastdeploy_client/client_app.py
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -15,7 +15,9 @@
 import gradio as gr
 import numpy as np
 
+from .http_client_manager import get_metric_data
 from .http_client_manager import HttpClientManager
+from .http_client_manager import metrics_table_head
 from .visualizer import visualize_detection
 from .visualizer import visualize_face_alignment
 from .visualizer import visualize_face_detection
@@ -137,80 +139,89 @@ def create_gradio_client_app():  # noqa:C901
                 with gr.Column():
                     with gr.Row():
                         server_addr_text = gr.Textbox(
-                            label="Server address",
+                            label="服务ip",
                             show_label=True,
                             max_lines=1,
-                            placeholder="localhost:8000",
+                            placeholder="localhost",
                         )
 
+                        server_http_port_text = gr.Textbox(
+                            label="推理服务端口",
+                            show_label=True,
+                            max_lines=1,
+                            placeholder="8000",
+                        )
+
+                        server_metric_port_text = gr.Textbox(
+                            label="性能服务端口",
+                            show_label=True,
+                            max_lines=1,
+                            placeholder="8002",
+                        )
+                    with gr.Row():
                         model_name_text = gr.Textbox(
-                            label="model name",
+                            label="模型名称",
                             show_label=True,
                             max_lines=1,
                             placeholder="yolov5",
                         )
-
                         model_version_text = gr.Textbox(
-                            label="model version",
+                            label="模型版本",
                             show_label=True,
                             max_lines=1,
                             placeholder="1",
                         )
 
             with gr.Box():
-                with gr.Tab("component format"):
-                    check_button = gr.Button("GetInputOutputName")
+                with gr.Tab("组件形式"):
+                    check_button = gr.Button("获取模型输入输出")
                     component_format_column = gr.Column(visible=False)
                     with component_format_column:
                         task_radio = gr.Radio(
                             choices=list(supported_tasks.keys()),
                             value='unspecified',
-                            label='task type',
+                            label='任务类型',
                             visible=True)
-                        gr.Markdown(
-                            "Fill inputs according to your need, choose either image or text for each input."
-                        )
+                        gr.Markdown("根据模型需要，挑选文本框或者图像框进行输入")
                         with gr.Row():
                             with gr.Column():
-                                gr.Markdown("Inputs")
+                                gr.Markdown("模型输入")
                                 input_accordions = []
                                 input_name_texts = []
                                 input_images = []
                                 input_texts = []
                                 for i in range(6):
                                     accordion = gr.Accordion(
-                                        "input {}".format(i),
+                                        "输入变量 {}".format(i),
                                         open=True,
                                         visible=False)
                                     with accordion:
                                         input_name_text = gr.Textbox(
-                                            label="input name",
-                                            interactive=False)
+                                            label="变量名", interactive=False)
                                         input_image = gr.Image(type='numpy')
                                         input_text = gr.Textbox(
-                                            label="contents", max_lines=1000)
+                                            label="文本框", max_lines=1000)
                                     input_accordions.append(accordion)
                                     input_name_texts.append(input_name_text)
                                     input_images.append(input_image)
                                     input_texts.append(input_text)
 
                             with gr.Column():
-                                gr.Markdown("Outputs")
+                                gr.Markdown("模型输出")
                                 output_accordions = []
                                 output_name_texts = []
                                 output_images = []
                                 output_texts = []
                                 for i in range(6):
                                     accordion = gr.Accordion(
-                                        "output {}".format(i),
+                                        "输出变量 {}".format(i),
                                         open=True,
                                         visible=False)
                                     with accordion:
                                         output_name_text = gr.Textbox(
-                                            label="output name",
-                                            interactive=False)
+                                            label="变量名", interactive=False)
                                         output_text = gr.Textbox(
-                                            label="raw data",
+                                            label="服务返回的原数据",
                                             interactive=False,
                                             show_label=True)
                                         output_image = gr.Image(
@@ -219,26 +230,40 @@ def create_gradio_client_app():  # noqa:C901
                                     output_name_texts.append(output_name_text)
                                     output_images.append(output_image)
                                     output_texts.append(output_text)
-                        component_submit_button = gr.Button("submit")
-                with gr.Tab("raw format"):
+                        component_submit_button = gr.Button("提交请求")
+                with gr.Tab("源格式"):
+                    gr.Markdown("模型输入")
                     raw_payload_text = gr.Textbox(
-                        label="request payload", max_lines=10000)
+                        label="负载数据", max_lines=10000)
                     with gr.Column():
-                        gr.Markdown("Outputs")
+                        gr.Markdown("输出")
                         output_raw_text = gr.Textbox(
-                            label="raw data", interactive=False)
-                    raw_submit_button = gr.Button("submit")
+                            label="服务返回的原数据", interactive=False)
+                    raw_submit_button = gr.Button("提交请求")
+
+            with gr.Box():
+                with gr.Column():
+                    gr.Markdown("服务性能统计（每次提交请求会自动更新数据，您也可以手动点击更新）")
+                    update_metric_button = gr.Button("更新数据")
+                    output_html_table = gr.Textbox(
+                        label="metrics",
+                        interactive=False,
+                        show_label=False,
+                        value=metrics_table_head.format('', ''))
 
             status_text = gr.Textbox(
                 label="status",
                 show_label=True,
                 max_lines=1,
                 interactive=False)
+
         all_input_output_components = input_accordions + input_name_texts + input_images + \
             input_texts + output_accordions + output_name_texts + output_images + output_texts
 
-        def get_input_output_name(server_addr, model_name, model_version):
+        def get_input_output_name(server_ip, server_port, model_name,
+                                  model_version):
             try:
+                server_addr = server_ip + ':' + server_port
                 input_metas, output_metas = _http_manager.get_model_meta(
                     server_addr, model_name, model_version)
             except Exception as e:
@@ -263,17 +288,20 @@ def get_input_output_name(server_addr, model_name, model_version):
             return results
 
         def component_inference(*args):
-            server_addr = args[0]
-            model_name = args[1]
-            model_version = args[2]
-            names = args[3:3 + len(input_name_texts)]
-            images = args[3 + len(input_name_texts):3 + len(input_name_texts) +
+            server_ip = args[0]
+            http_port = args[1]
+            metric_port = args[2]
+            model_name = args[3]
+            model_version = args[4]
+            names = args[5:5 + len(input_name_texts)]
+            images = args[5 + len(input_name_texts):5 + len(input_name_texts) +
                           len(input_images)]
-            texts = args[3 + len(input_name_texts) + len(input_images):3 +
+            texts = args[5 + len(input_name_texts) + len(input_images):5 +
                          len(input_name_texts) + len(input_images) +
                          len(input_texts)]
             task_type = args[-1]
-            if server_addr and model_name and model_version:
+            server_addr = server_ip + ':' + http_port
+            if server_ip and http_port and model_name and model_version:
                 inputs = {}
                 for i, input_name in enumerate(names):
                     if input_name:
@@ -296,6 +324,9 @@ def component_inference(*args):
                                     task_type](images[0], data)
                             except Exception:
                                 results[output_images[i]] = None
+                    if metric_port:
+                        html_table = get_metric_data(server_ip, metric_port)
+                        results[output_html_table] = html_table
                     return results
                 except Exception as e:
                     return {status_text: 'Error: {}'.format(e)}
@@ -306,10 +337,13 @@ def component_inference(*args):
                 }
 
         def raw_inference(*args):
-            server_addr = args[0]
-            model_name = args[1]
-            model_version = args[2]
-            payload_text = args[3]
+            server_ip = args[0]
+            http_port = args[1]
+            metric_port = args[2]
+            model_name = args[3]
+            model_version = args[4]
+            payload_text = args[5]
+            server_addr = server_ip + ':' + http_port
             try:
                 result = _http_manager.raw_infer(server_addr, model_name,
                                                  model_version, payload_text)
@@ -317,13 +351,34 @@ def raw_inference(*args):
                     status_text: 'Get response from server',
                     output_raw_text: result
                 }
+                if server_ip and metric_port:
+                    html_table = get_metric_data(server_ip, metric_port)
+                    results[output_html_table] = html_table
                 return results
             except Exception as e:
                 return {status_text: 'Error: {}'.format(e)}
 
+        def update_metric(server_ip, metrics_port):
+            if server_ip and metrics_port:
+                try:
+                    html_table = get_metric_data(server_ip, metrics_port)
+                    return {
+                        output_html_table: html_table,
+                        status_text: "Successfully update metrics."
+                    }
+                except Exception as e:
+                    return {status_text: 'Error: {}'.format(e)}
+            else:
+                return {
+                    status_text: 'Please input server ip and metrics_port.'
+                }
+
         check_button.click(
             fn=get_input_output_name,
-            inputs=[server_addr_text, model_name_text, model_version_text],
+            inputs=[
+                server_addr_text, server_http_port_text, model_name_text,
+                model_version_text
+            ],
             outputs=[
                 *all_input_output_components, check_button,
                 component_format_column, status_text
@@ -331,17 +386,24 @@ def raw_inference(*args):
         component_submit_button.click(
             fn=component_inference,
             inputs=[
-                server_addr_text, model_name_text, model_version_text,
+                server_addr_text, server_http_port_text,
+                server_metric_port_text, model_name_text, model_version_text,
                 *input_name_texts, *input_images, *input_texts, task_radio
             ],
             outputs=[
-                *output_name_texts, *output_images, *output_texts, status_text
+                *output_name_texts, *output_images, *output_texts, status_text,
+                output_html_table
             ])
         raw_submit_button.click(
             fn=raw_inference,
             inputs=[
-                server_addr_text, model_name_text, model_version_text,
+                server_addr_text, server_http_port_text,
+                server_metric_port_text, model_name_text, model_version_text,
                 raw_payload_text
             ],
-            outputs=[output_raw_text, status_text])
+            outputs=[output_raw_text, status_text, output_html_table])
+        update_metric_button.click(
+            fn=update_metric,
+            inputs=[server_addr_text, server_metric_port_text],
+            outputs=[output_html_table, status_text])
     return block
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index 0fb300023..cf40a1194 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # =======================================================================
 import json
+import re
 
 import requests
 import tritonclient.http as httpclient
@@ -50,6 +51,164 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta):
     return inputs, outputs
 
 
+metrics_table_head = """
+<style>
+table, th {
+  border:0.1px solid black;
+}
+</style>
+
+<div>
+<table style="width:100%">
+  <tr>
+    <th rowspan="2">模型名称</th>
+    <th colspan="4">执行统计</th>
+    <th colspan="5">延迟统计</th>
+
+  </tr>
+  <tr>
+   <th>请求处理成功数</th>
+  <th>请求处理失败数</th>
+  <th>推理batch数</th>
+  <th>推理样本数</th>
+  <th>请求处理时间</th>
+  <th>任务队列等待时间</th>
+  <th>输入处理时间</th>
+  <th>模型推理时间</th>
+  <th>输出处理时间</th>
+  </tr>
+  {}
+</table>
+</div>
+<br>
+<br>
+<br>
+<br>
+<br>
+<div>
+<table style="width:100%">
+  <tr>
+    <th rowspan="2">GPU</th>
+    <th colspan="4">性能指标</th>
+    <th colspan="2">显存</th>
+  </tr>
+  <tr>
+   <th>利用率</th>
+  <th>功率</th>
+  <th>功率限制</th>
+  <th>耗电量</th>
+  <th>总量</th>
+  <th>已使用</th>
+  </tr>
+  {}
+</table>
+</div>
+"""
+
+
+def get_metric_data(server_addr, metric_port):  # noqa:C901
+    '''
+    Get metrics data from fastdeploy server, and transform it into html table.
+    Args:
+        server_addr(str): fastdeployserver ip address
+        metric_port(int): fastdeployserver metrics port
+    Returns:
+        htmltable(str): html table to show metrics data
+    '''
+    model_table = {}
+    gpu_table = {}
+    metric_column_name = {
+        "Model": {
+            "nv_inference_request_success", "nv_inference_request_failure",
+            "nv_inference_count", "nv_inference_exec_count",
+            "nv_inference_request_duration_us",
+            "nv_inference_queue_duration_us",
+            "nv_inference_compute_input_duration_us",
+            "nv_inference_compute_infer_duration_us",
+            "nv_inference_compute_output_duration_us"
+        },
+        "GPU": {
+            "nv_gpu_power_usage", "nv_gpu_power_limit",
+            "nv_energy_consumption", "nv_gpu_utilization",
+            "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
+        },
+        "CPU": {
+            "nv_cpu_utilization", "nv_cpu_memory_total_bytes",
+            "nv_cpu_memory_used_bytes"
+        }
+    }
+    res = requests.get("http://{}:{}/metrics".format(server_addr, metric_port))
+    metric_content = res.text
+    for content in metric_content.split('\n'):
+        if content.startswith('#'):
+            continue
+        else:
+            res = re.match(r'(\w+){(.*)} (\w+)',
+                           content)  # match output by server metrics interface
+            if not res:
+                continue
+            metric_name = res.group(1)
+            model = res.group(2)
+            value = res.group(3)
+            infos = {}
+            for info in model.split(','):
+                k, v = info.split('=')
+                v = v.strip('"')
+                infos[k] = v
+            for key, metric_names in metric_column_name.items():
+                if metric_name in metric_names:
+                    if key == 'Model':
+                        model_name = infos['model']
+                        if model_name not in model_table:
+                            model_table[model_name] = {}
+                        model_table[model_name][metric_name] = value
+                    elif key == 'GPU':
+                        gpu_name = infos['gpu_uuid']
+                        if gpu_name not in gpu_table:
+                            gpu_table[gpu_name] = {}
+                        gpu_table[gpu_name][metric_name] = value
+                    elif key == 'CPU':
+                        pass
+    model_data_list = []
+    gpu_data_list = []
+    model_data_metric_names = [
+        "nv_inference_request_success", "nv_inference_request_failure",
+        "nv_inference_exec_count", "nv_inference_count",
+        "nv_inference_request_duration_us", "nv_inference_queue_duration_us",
+        "nv_inference_compute_input_duration_us",
+        "nv_inference_compute_infer_duration_us",
+        "nv_inference_compute_output_duration_us"
+    ]
+    gpu_data_metric_names = [
+        "nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit",
+        "nv_energy_consumption", "nv_gpu_memory_total_bytes",
+        "nv_gpu_memory_used_bytes"
+    ]
+    for k, v in model_table.items():
+        data = []
+        data.append(k)
+        for data_metric in model_data_metric_names:
+            data.append(v[data_metric])
+        model_data_list.append(data)
+    for k, v in gpu_table.items():
+        data = []
+        data.append(k)
+        for data_metric in gpu_data_metric_names:
+            data.append(v[data_metric])
+        gpu_data_list.append(data)
+    model_data = '\n'.join([
+        "<tr>" + '\n'.join(["<td>" + item + "</td>"
+                            for item in data]) + "</tr>"
+        for data in model_data_list
+    ])
+    gpu_data = '\n'.join([
+        "<tr>" + '\n'.join(["<td>" + item + "</td>"
+                            for item in data]) + "</tr>"
+        for data in gpu_data_list
+    ])
+    return metrics_table_head.format(model_data, gpu_data)
+
+
 class HttpClientManager:
     def __init__(self):
         self.clients = {}  # server url: httpclient
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 4b02f829a..47981dac4 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -113,9 +113,12 @@ def stop_server(self, server_id):
             # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
             kill_process(server_id)
         # check if there are servers killed by other vdl app instance and become zoombie
+        should_delete = []
         for server_id, process in self.opened_servers.items():
             if process.poll() is not None:
-                del self.opened_servers[server_id]
+                should_delete.append(server_id)
+        for server_id in should_delete:
+            del self.opened_servers[server_id]
 
     @result('text/plain')
     def get_server_output(self, server_id, length):
diff --git a/visualdl/server/app.py b/visualdl/server/app.py
index 8cf1dea42..94492fd78 100644
--- a/visualdl/server/app.py
+++ b/visualdl/server/app.py
@@ -234,31 +234,45 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str):
                 server_id = request_args.get('server_id')
                 start_args = get_start_arguments(server_id)
                 http_port = start_args.get('http-port', '')
+                metrics_port = start_args.get('metrics-port', '')
                 model_name = start_args.get('default_model_name', '')
                 content = content.decode()
                 try:
                     default_server_addr = re.search(
-                        r'"label": "Server address".*?"value": "".*?}',
-                        content).group(0)
+                        r'"label": "服务ip".*?"value": "".*?}', content).group(0)
                     cur_server_addr = default_server_addr.replace(
-                        '"value": ""',
-                        '"value": "localhost:{}"'.format(http_port))
-                    default_model_name = re.search(
-                        r'"label": "model name".*?"value": "".*?}',
+                        '"value": ""', '"value": "localhost"')
+                    default_http_port = re.search(
+                        r'"label": "推理服务端口".*?"value": "".*?}',
                         content).group(0)
+                    cur_http_port = default_http_port.replace(
+                        '"value": ""', '"value": "{}"'.format(http_port))
+                    default_metrics_port = re.search(
+                        r'"label": "性能服务端口".*?"value": "".*?}',
+                        content).group(0)
+                    cur_metrics_port = default_metrics_port.replace(
+                        '"value": ""', '"value": "{}"'.format(metrics_port))
+                    default_model_name = re.search(
+                        r'"label": "模型名称".*?"value": "".*?}', content).group(0)
                     cur_model_name = default_model_name.replace(
                         '"value": ""', '"value": "{}"'.format(model_name))
                     default_model_version = re.search(
-                        r'"label": "model version".*?"value": "".*?}',
-                        content).group(0)
+                        r'"label": "模型版本".*?"value": "".*?}', content).group(0)
                     cur_model_version = default_model_version.replace(
                         '"value": ""', '"value": "{}"'.format('1'))
+
+                    content = content.replace(default_server_addr,
+                                              cur_server_addr)
                     if http_port:
-                        content = content.replace(default_server_addr,
-                                                  cur_server_addr)
+                        content = content.replace(default_http_port,
+                                                  cur_http_port)
+                    if metrics_port:
+                        content = content.replace(default_metrics_port,
+                                                  cur_metrics_port)
                     if model_name:
                         content = content.replace(default_model_name,
                                                   cur_model_name)
+
                     content = content.replace(default_model_version,
                                               cur_model_version)
                 except Exception:

From 1eb15fc7233a9e08f9c61c176b74d03ceede048f Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 16 Dec 2022 10:14:11 +0800
Subject: [PATCH 23/48] update presentation

---
 .../inference/fastdeploy_client/client_app.py |  8 +++----
 .../fastdeploy_client/http_client_manager.py  | 10 +++++---
 .../component/inference/fastdeploy_lib.py     |  9 ++++---
 visualdl/server/app.py                        | 24 ++++++++++++-------
 4 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
index a6af465ff..e918775fe 100644
--- a/visualdl/component/inference/fastdeploy_client/client_app.py
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -231,25 +231,25 @@ def create_gradio_client_app():  # noqa:C901
                                     output_images.append(output_image)
                                     output_texts.append(output_text)
                         component_submit_button = gr.Button("提交请求")
-                with gr.Tab("源格式"):
+                with gr.Tab("原始形式"):
                     gr.Markdown("模型输入")
                     raw_payload_text = gr.Textbox(
                         label="负载数据", max_lines=10000)
                     with gr.Column():
                         gr.Markdown("输出")
                         output_raw_text = gr.Textbox(
-                            label="服务返回的原数据", interactive=False)
+                            label="服务返回的原始数据", interactive=False)
                     raw_submit_button = gr.Button("提交请求")
 
             with gr.Box():
                 with gr.Column():
                     gr.Markdown("服务性能统计（每次提交请求会自动更新数据，您也可以手动点击更新）")
-                    update_metric_button = gr.Button("更新数据")
-                    output_html_table = gr.Textbox(
+                    output_html_table = gr.HTML(
                         label="metrics",
                         interactive=False,
                         show_label=False,
                         value=metrics_table_head.format('', ''))
+                    update_metric_button = gr.Button("更新统计数据")
 
             status_text = gr.Textbox(
                 label="status",
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index cf40a1194..4c29bd102 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -53,9 +53,9 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta):
 
 metrics_table_head = """
 <style>
-table, th {
+table, th {{
   border:0.1px solid black;
-}
+}}
 </style>
 
 <div>
@@ -137,7 +137,11 @@ def get_metric_data(server_addr, metric_port):  # noqa:C901
             "nv_cpu_memory_used_bytes"
         }
     }
-    res = requests.get("http://{}:{}/metrics".format(server_addr, metric_port))
+    try:
+        res = requests.get("http://{}:{}/metrics".format(
+            server_addr, metric_port))
+    except Exception:
+        return metrics_table_head.format('', '')
     metric_content = res.text
     for content in metric_content.split('\n'):
         if content.startswith('#'):
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 80ff1800a..69d7be5d2 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -352,7 +352,7 @@ def launch_process(kwargs: dict):
     for key, value in kwargs.items():
         if key == 'default_model_name':  # Used to fill client model_name automatically
             start_args[key] = value
-            pass
+            continue
         cmd.append('--{}'.format(key))
         cmd.append('{}'.format(value))
         start_args[key] = value
@@ -524,8 +524,11 @@ def check_process_alive(pid):
 def generate_metric_table(server_addr, server_port):
     model_table = {}
     gpu_table = {}
-
-    res = requests.get("http://{}:{}/metrics".format(server_addr, server_port))
+    try:
+        res = requests.get("http://{}:{}/metrics".format(
+            server_addr, server_port))
+    except Exception:
+        return {}
     metric_content = res.text
     for content in metric_content.split('\n'):
         if content.startswith('#'):
diff --git a/visualdl/server/app.py b/visualdl/server/app.py
index 94492fd78..e451c4e21 100644
--- a/visualdl/server/app.py
+++ b/visualdl/server/app.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =======================================================================
+import json
 import multiprocessing
 import os
 import re
@@ -239,28 +240,35 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str):
                 content = content.decode()
                 try:
                     default_server_addr = re.search(
-                        r'"label": "服务ip".*?"value": "".*?}', content).group(0)
+                        '"label": {}.*?"value": "".*?}}'.format(
+                            json.dumps("服务ip", ensure_ascii=True).replace(
+                                '\\', '\\\\')), content).group(0)
                     cur_server_addr = default_server_addr.replace(
                         '"value": ""', '"value": "localhost"')
                     default_http_port = re.search(
-                        r'"label": "推理服务端口".*?"value": "".*?}',
-                        content).group(0)
+                        '"label": {}.*?"value": "".*?}}'.format(
+                            json.dumps("推理服务端口", ensure_ascii=True).replace(
+                                '\\', '\\\\')), content).group(0)
                     cur_http_port = default_http_port.replace(
                         '"value": ""', '"value": "{}"'.format(http_port))
                     default_metrics_port = re.search(
-                        r'"label": "性能服务端口".*?"value": "".*?}',
-                        content).group(0)
+                        '"label": {}.*?"value": "".*?}}'.format(
+                            json.dumps("性能服务端口", ensure_ascii=True).replace(
+                                '\\', '\\\\')), content).group(0)
                     cur_metrics_port = default_metrics_port.replace(
                         '"value": ""', '"value": "{}"'.format(metrics_port))
                     default_model_name = re.search(
-                        r'"label": "模型名称".*?"value": "".*?}', content).group(0)
+                        '"label": {}.*?"value": "".*?}}'.format(
+                            json.dumps("模型名称", ensure_ascii=True).replace(
+                                '\\', '\\\\')), content).group(0)
                     cur_model_name = default_model_name.replace(
                         '"value": ""', '"value": "{}"'.format(model_name))
                     default_model_version = re.search(
-                        r'"label": "模型版本".*?"value": "".*?}', content).group(0)
+                        '"label": {}.*?"value": "".*?}}'.format(
+                            json.dumps("模型版本", ensure_ascii=True).replace(
+                                '\\', '\\\\')), content).group(0)
                     cur_model_version = default_model_version.replace(
                         '"value": ""', '"value": "{}"'.format('1'))
-
                     content = content.replace(default_server_addr,
                                               cur_server_addr)
                     if http_port:

From d6abc5ab9d5cdf576a845669f03a34126c15ec1a Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 16 Dec 2022 11:00:10 +0800
Subject: [PATCH 24/48] Change return value to None for frontend performance
 data when server not ready

---
 visualdl/component/inference/fastdeploy_lib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 69d7be5d2..6816be342 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -528,7 +528,7 @@ def generate_metric_table(server_addr, server_port):
         res = requests.get("http://{}:{}/metrics".format(
             server_addr, server_port))
     except Exception:
-        return {}
+        return None
     metric_content = res.text
     for content in metric_content.split('\n'):
         if content.startswith('#'):

From 41f5dfd5ea6890316e22b97bf60854174c60fd93 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 23 Dec 2022 16:42:33 +0800
Subject: [PATCH 25/48] add get_server_config and download_pretrain_model api

---
 .../inference/fastdeploy_client/client_app.py |   3 +-
 .../fastdeploy_client/http_client_manager.py  |   7 +-
 .../component/inference/fastdeploy_lib.py     | 194 ++++++++++++------
 .../component/inference/fastdeploy_server.py  |  43 ++--
 4 files changed, 171 insertions(+), 76 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
index e918775fe..7b28bdbe3 100644
--- a/visualdl/component/inference/fastdeploy_client/client_app.py
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -273,7 +273,7 @@ def get_input_output_name(server_ip, server_port, model_name,
                 for component in all_input_output_components
             }
             results[component_format_column] = gr.update(visible=True)
-            results[check_button] = gr.update(visible=False)
+            # results[check_button] = gr.update(visible=False)
             for input_accordio in input_accordions:
                 results[input_accordio] = gr.update(visible=False)
             for output_accordio in output_accordions:
@@ -313,6 +313,7 @@ def component_inference(*args):
                 try:
                     infer_results = _http_manager.infer(
                         server_addr, model_name, model_version, inputs)
+                    print('infer_results', infer_results)
                     results = {status_text: 'Inference Successful'}
                     for i, (output_name,
                             data) in enumerate(infer_results.items()):
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index 4c29bd102..0e796c8a1 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -241,8 +241,11 @@ def infer(self, server_url, model_name, model_version, inputs):
         for output in output_metadata:
             result = response.as_numpy(output.name)  # datatype: numpy
             if output.datatype == 'BYTES':
-                result = result[0][0]  # datatype: bytes
-                result = json.loads(result)  # datatype: json
+                try:  # maybe not vison tasks, normal text
+                    value = result[0][0]  # datatype: bytes
+                    result = json.loads(value)  # datatype: json
+                except Exception:
+                    pass
             else:
                 result = result[0]
             results[output.name] = result
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 6816be342..b7f971994 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -71,7 +71,6 @@ def analyse_config(cur_dir: str):
   Return a json object to describe configuration.
   '''
     all_model_configs = {}
-    all_model_paths = {}
     all_model_versions = {}
     parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(
         None)  # models can only put directory in model repository,
@@ -82,7 +81,6 @@ def analyse_config(cur_dir: str):
         model_name = os.path.basename(model_dir)
         for filename in filenames:
             if 'config.pbtxt' in filename:
-                all_model_paths[model_name] = model_dir  # store model path
                 json_config = json.loads(
                     pbtxt2json(open(os.path.join(model_dir, filename)).read()))
                 all_model_configs[
@@ -104,7 +102,7 @@ def analyse_config(cur_dir: str):
     if not all_model_configs:
         raise Exception(
             'Not a valid model repository, please choose the right path')
-    return all_model_configs, all_model_versions, all_model_paths
+    return all_model_configs, all_model_versions
 
 
 def exchange_format_to_original_format(exchange_format):
@@ -353,10 +351,18 @@ def launch_process(kwargs: dict):
         if key == 'default_model_name':  # Used to fill client model_name automatically
             start_args[key] = value
             continue
+        if key == 'server-name' or key == 'ensemble-img':  # extra information
+            start_args[key] = value
+            continue
         cmd.append('--{}'.format(key))
         cmd.append('{}'.format(value))
         start_args[key] = value
 
+    all_model_configs, all_model_versions, _ = analyse_config(
+        start_args['model-repository'])
+    model_repo_config = original_format_to_exchange_format(
+        all_model_configs, all_model_versions)
+    model_repo_config['ensemble-img'] = start_args['ensemble-img']
     logfilename = 'logfile-{}'.format(get_random_string(8))
     while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)):
         logfilename = 'logfile-{}'.format(get_random_string(8))
@@ -367,11 +373,18 @@ def launch_process(kwargs: dict):
             buffering=1),
         stderr=STDOUT,
         universal_newlines=True)
-    with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(p.pid)),
-              'w') as f:
-        f.write(
-            logfilename + '\n' + json.dumps(start_args)
-        )  # filename ${p.pid} contain the real log filename ${logfilename}, and start arguments
+    server_name = start_args['server-name'] if start_args[
+        'server-name'] else p.pid
+    with open(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_name)),
+            'w') as f:
+        # filename ${server_name} contain 4 lines:
+        # line1 : the real log filename ${logfilename}
+        # line2 : pid
+        # line3 : launch arguments
+        # line4 : model-repository configuration
+        f.write(logfilename + '\n' + str(p.pid) + '\n' +
+                json.dumps(start_args) + '\n' + json.dumps(model_repo_config))
     return p
 
 
@@ -386,7 +399,7 @@ def get_start_arguments(server_id):
     '''
     Get the start arguments for fastdeployserver process.
     Args:
-        server_id(int): fastdeployserver process id
+        server_id(str): fastdeployserver process name
     Returns:
         args(dict): launch arguments when start fastdeployserver process.
     '''
@@ -396,19 +409,73 @@ def get_start_arguments(server_id):
         with open(
                 os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
                 'r') as f:
-            f.readline()
-            args = json.loads(f.read())
+            arguments_json = f.read().split('\n')[2]
+            args = json.loads(arguments_json)
     return args
 
 
-def get_process_output(pid, length):
+def get_process_pid(server_id):
+    '''
+    Get the process id for fastdeployserver process.
+    Args:
+        server_id(str): fastdeployserver process name
+    Returns:
+        pid(int): process id.
+    '''
+    pid = None
+    if os.path.exists(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+        with open(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+                'r') as f:
+            pid = int(f.read().split('\n')[1])
+    return pid
+
+
+def get_process_logfile_name(server_id):
+    '''
+    Get the process logfile name for fastdeployserver process.
+    Args:
+        server_id(str): fastdeployserver process name
+    Returns:
+        logfile(str): logfile name.
+    '''
+    filename = None
+    if os.path.exists(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+        with open(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+                'r') as f:
+            filename = int(f.read().split('\n')[0])
+    return filename
+
+
+def get_process_model_configuration(server_id):
+    '''
+    Get the model repository configuration for fastdeployserver process.
+    Args:
+        server_id(str): fastdeployserver process name
+    Returns:
+        configuration(dict): model repository configuration
+    '''
+    conf = {}
+    if os.path.exists(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+        with open(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+                'r') as f:
+            conf_json = f.read().split('\n')[3]
+            conf = json.loads(conf_json)
+    return conf
+
+
+def get_process_output(server_id, length):
     '''
   Get the standard output of a opened subprocess.
   '''
-    if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
-        with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
-                  'r') as f:
-            logfilename = f.readline().strip('\n')
+    if os.path.exists(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+        logfilename = get_process_logfile_name(server_id)
         # delete file ${logfilename} if exists
         if os.path.exists(
                 os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
@@ -420,14 +487,31 @@ def get_process_output(pid, length):
                 return data
 
 
+def delete_files_for_process(server_id):
+    '''
+    Delete logfile for fastdeployserver process.
+    Args:
+        server_id(str): fastdeployserver process name
+    '''
+    if os.path.exists(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+        logfilename = get_process_logfile_name(server_id)
+        # delete file ${logfilename} if exists
+        if os.path.exists(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
+            os.remove(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename)))
+        os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)))
+
+
 def kill_process(process):
     '''
   Stop a opened subprocess.
   '''
-    if type(process) == int:  # pid, use os.kill to terminate
-        pid = process
+    if type(process) == str:  # server_id, use os.kill to terminate
+        pid = get_process_pid(process)
         try:
-            os.kill(process, signal.SIGKILL)
+            os.kill(pid, signal.SIGKILL)
             # delete file ${pid} if exists
         except Exception:
             pass
@@ -438,60 +522,38 @@ def kill_process(process):
             process.wait(10)
         except Exception:
             pass
-    if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
-        with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
-                  'r') as f:
-            logfilename = f.readline().strip('\n')
-        # delete file ${logfilename} if exists
-        if os.path.exists(
-                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
-            os.remove(
-                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename)))
-        os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)))
 
 
 def get_alive_fastdeploy_servers():
     '''
-    Search pids in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \
+    Search server names in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \
         some unexpectable reasons, delete log file.
     '''
-    pids = [
+    server_names = [
         name for name in os.listdir(FASTDEPLOYSERVER_PATH)
         if 'logfile' not in name
     ]
-    should_delete_pids = []
-    for pid in pids:
-        if check_process_alive(pid) is False:
-            if os.path.exists(
-                    os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))):
-                with open(
-                        os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)),
-                        'r') as f:
-                    logfilename = f.readline().strip('\n')
-                # delete file ${logfilename} if exists
-                if os.path.exists(
-                        os.path.join(FASTDEPLOYSERVER_PATH,
-                                     '{}'.format(logfilename))):
-                    os.remove(
-                        os.path.join(FASTDEPLOYSERVER_PATH,
-                                     '{}'.format(logfilename)))
-                os.remove(
-                    os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)))
-            should_delete_pids.append(pid)
-    for pid in should_delete_pids:
-        pids.remove(pid)
-    return pids
-
-
-def check_process_alive(pid):
+    should_delete_servers = []
+    for server_name in server_names:
+        if check_process_alive(server_name) is False:
+            delete_files_for_process(server_name)
+            should_delete_servers.append(server_name)
+    for server_name in should_delete_servers:
+        server_names.remove(server_name)
+    return server_names
+
+
+def check_process_alive(server_id):
     '''
-    Given a pid, check whether the process is alive or not.
+    Given a server id, check whether the process is alive or not.
     Args:
-        pid(int): process id
+        server_id(str): fastdeployserver process name
     Return:
         status(bool): True if process is still alive.
     '''
-    pid = int(pid)
+    pid = get_process_pid(server_id)
+    if pid is None:
+        return False
     try:
         os.kill(pid, 0)
     except OSError:
@@ -521,7 +583,7 @@ def check_process_alive(pid):
 }
 
 
-def generate_metric_table(server_addr, server_port):
+def generate_metric_table(server_addr, server_port):  # noqa:C901
     model_table = {}
     gpu_table = {}
     try:
@@ -546,6 +608,18 @@ def generate_metric_table(server_addr, server_port):
                 k, v = info.split('=')
                 v = v.strip('"')
                 infos[k] = v
+            if metric_name in [
+                    "nv_inference_request_duration_us",
+                    "nv_inference_queue_duration_us",
+                    "nv_inference_compute_input_duration_us",
+                    "nv_inference_compute_infer_duration_us",
+                    "nv_inference_compute_output_duration_us"
+            ]:
+                value = float(value) / 1000
+            elif metric_name in [
+                    "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
+            ]:
+                value = float(value) / 1024 / 1024 / 1024
             for key, metric_names in _metric_column_name.items():
                 if metric_name in metric_names:
                     if key == 'Model':
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 47981dac4..e52eb6ec0 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -21,13 +21,16 @@
 from multiprocessing import Process
 from pathlib import Path
 
+import fastdeploy as fd
 import requests
 
 from .fastdeploy_client.client_app import create_gradio_client_app
 from .fastdeploy_lib import analyse_config
+from .fastdeploy_lib import delete_files_for_process
 from .fastdeploy_lib import exchange_format_to_original_format
 from .fastdeploy_lib import generate_metric_table
 from .fastdeploy_lib import get_alive_fastdeploy_servers
+from .fastdeploy_lib import get_process_model_configuration
 from .fastdeploy_lib import get_process_output
 from .fastdeploy_lib import get_start_arguments
 from .fastdeploy_lib import json2pbtxt
@@ -46,7 +49,6 @@ def __init__(self):
         self.opened_servers = {
         }  # Use to store the opened server process pid and process itself
         self.client_port = None
-        self.model_paths = {}
 
     @result()
     def get_directory(self, cur_dir):
@@ -66,10 +68,7 @@ def get_directory(self, cur_dir):
 
     @result()
     def get_config(self, cur_dir):
-        all_model_configs, all_model_versions, all_model_paths = analyse_config(
-            cur_dir)
-        for name, value in all_model_paths.items():
-            self.model_paths[(Path(os.path.abspath(cur_dir)), name)] = value
+        all_model_configs, all_model_versions = analyse_config(cur_dir)
         return original_format_to_exchange_format(all_model_configs,
                                                   all_model_versions)
 
@@ -77,8 +76,7 @@ def get_config(self, cur_dir):
     def config_update(self, cur_dir, model_name, config):
         config = json.loads(config)
         all_models = exchange_format_to_original_format(config)
-        model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)),
-                                      model_name)]
+        model_dir = os.path.join(os.path.abspath(cur_dir), model_name)
         filtered_config = validate_data(all_models[model_name])
         text_proto = json2pbtxt(json.dumps(filtered_config))
         # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data
@@ -99,19 +97,21 @@ def start_server(self, configs):
             raise RuntimeError(
                 "Launch fastdeploy server failed, please check your launching arguments"
             )
-        self.opened_servers[process.pid] = process
-        return process.pid
+        server_name = configs['server-name'] if configs[
+            'server-name'] else process.pid
+        self.opened_servers[server_name] = process
+        return server_name
 
     @result()
     def stop_server(self, server_id):
-        server_id = int(server_id)
         if server_id in self.opened_servers:  # check if server_id in self.opened_servers
             kill_process(self.opened_servers[server_id])
             del self.opened_servers[server_id]
-        elif str(server_id) in set(
+        elif server_id in set(
                 os.listdir(FASTDEPLOYSERVER_PATH)):  # check if server_id in
             # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
             kill_process(server_id)
+        delete_files_for_process(server_id)
         # check if there are servers killed by other vdl app instance and become zoombie
         should_delete = []
         for server_id, process in self.opened_servers.items():
@@ -122,7 +122,6 @@ def stop_server(self, server_id):
 
     @result('text/plain')
     def get_server_output(self, server_id, length):
-        server_id = int(server_id)
         length = int(length)
         if server_id in self.opened_servers:  # check if server_id in self.opened_servers
             return get_process_output(server_id, length)
@@ -144,6 +143,20 @@ def get_server_metric(self, server_id):
     def get_server_list(self):
         return get_alive_fastdeploy_servers()
 
+    @result()
+    def get_server_config(self, server_id):
+        return get_process_model_configuration(server_id)
+
+    @result()
+    def download_pretrain_model(self, cur_dir, model_name, version,
+                                pretrain_model_name):
+        version_resource_dir = os.path.join(
+            os.path.abspath(cur_dir), model_name, version)
+        fd.download_model(name=pretrain_model_name, path=version_resource_dir)
+        os.system('mv {}/{}/* {} && rm -r {}/{}'.format(
+            version_resource_dir, pretrain_model_name, version_resource_dir,
+            version_resource_dir, pretrain_model_name))
+
     def create_fastdeploy_client(self):
         if self.client_port is None:
 
@@ -185,7 +198,11 @@ def create_fastdeploy_api_call():
         'get_server_output': (api.get_server_output, ['server_id', 'length']),
         'create_fastdeploy_client': (api.create_fastdeploy_client, []),
         'get_server_list': (api.get_server_list, []),
-        'get_server_metric': (api.get_server_metric, ['server_id'])
+        'get_server_metric': (api.get_server_metric, ['server_id']),
+        'get_server_config': (api.get_server_config, ['server_id']),
+        'download_pretrain_model':
+        (api.download_pretrain_model,
+         ['dir', 'name', 'version', 'pretrain_model_name']),
     }
 
     def call(path: str, args):

From 042189248915e63e52977088b659148b0650137e Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 23 Dec 2022 17:07:34 +0800
Subject: [PATCH 26/48] add get_server_config and download_pretrain_model api

---
 visualdl/component/inference/fastdeploy_lib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index b7f971994..617850ce9 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -358,7 +358,7 @@ def launch_process(kwargs: dict):
         cmd.append('{}'.format(value))
         start_args[key] = value
 
-    all_model_configs, all_model_versions, _ = analyse_config(
+    all_model_configs, all_model_versions = analyse_config(
         start_args['model-repository'])
     model_repo_config = original_format_to_exchange_format(
         all_model_configs, all_model_versions)

From e7c9e5323aa82b07c8a7642106eed237ec542249 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 27 Dec 2022 17:46:39 +0800
Subject: [PATCH 27/48] add unit for metric table

---
 .../fastdeploy_client/http_client_manager.py  | 34 +++++++++++++------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index 0e796c8a1..cf0b8a680 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -71,11 +71,11 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta):
   <th>请求处理失败数</th>
   <th>推理batch数</th>
   <th>推理样本数</th>
-  <th>请求处理时间</th>
-  <th>任务队列等待时间</th>
-  <th>输入处理时间</th>
-  <th>模型推理时间</th>
-  <th>输出处理时间</th>
+  <th>请求处理时间(ms)</th>
+  <th>任务队列等待时间(ms)</th>
+  <th>输入处理时间(ms)</th>
+  <th>模型推理时间(ms)</th>
+  <th>输出处理时间(ms)</th>
   </tr>
   {}
 </table>
@@ -93,12 +93,12 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta):
     <th colspan="2">显存</th>
   </tr>
   <tr>
-   <th>利用率</th>
-  <th>功率</th>
-  <th>功率限制</th>
-  <th>耗电量</th>
-  <th>总量</th>
-  <th>已使用</th>
+   <th>利用率(%)</th>
+  <th>功率(W)</th>
+  <th>功率限制(W)</th>
+  <th>耗电量(W)</th>
+  <th>总量(GB)</th>
+  <th>已使用(GB)</th>
   </tr>
   {}
 </table>
@@ -159,6 +159,18 @@ def get_metric_data(server_addr, metric_port):  # noqa:C901
                 k, v = info.split('=')
                 v = v.strip('"')
                 infos[k] = v
+            if metric_name in [
+                    "nv_inference_request_duration_us",
+                    "nv_inference_queue_duration_us",
+                    "nv_inference_compute_input_duration_us",
+                    "nv_inference_compute_infer_duration_us",
+                    "nv_inference_compute_output_duration_us"
+            ]:
+                value = float(value) / 1000
+            elif metric_name in [
+                    "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
+            ]:
+                value = float(value) / 1024 / 1024 / 1024
             for key, metric_names in metric_column_name.items():
                 if metric_name in metric_names:
                     if key == 'Model':

From 4c67a0229897030aa30cee0b16476188dc0f4f96 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 27 Dec 2022 18:07:34 +0800
Subject: [PATCH 28/48] add unit for metric table

---
 .../inference/fastdeploy_client/http_client_manager.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index cf0b8a680..47872e9b1 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -166,11 +166,11 @@ def get_metric_data(server_addr, metric_port):  # noqa:C901
                     "nv_inference_compute_infer_duration_us",
                     "nv_inference_compute_output_duration_us"
             ]:
-                value = float(value) / 1000
+                value = str(float(value) / 1000)
             elif metric_name in [
                     "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
             ]:
-                value = float(value) / 1024 / 1024 / 1024
+                value = str(float(value) / 1024 / 1024 / 1024)
             for key, metric_names in metric_column_name.items():
                 if metric_name in metric_names:
                     if key == 'Model':

From 15c23405e017ab5d3b5f80af8d772a9e0c8bf56a Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 28 Dec 2022 12:05:16 +0800
Subject: [PATCH 29/48] fix a bug

---
 visualdl/component/inference/fastdeploy_lib.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 617850ce9..3163c9a66 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -346,6 +346,7 @@ def launch_process(kwargs: dict):
   Launch a fastdeploy server according to specified arguments.
   '''
     cmd = ['fastdeployserver']
+    launch_env = os.environ.copy()
     start_args = {}
     for key, value in kwargs.items():
         if key == 'default_model_name':  # Used to fill client model_name automatically
@@ -354,6 +355,10 @@ def launch_process(kwargs: dict):
         if key == 'server-name' or key == 'ensemble-img':  # extra information
             start_args[key] = value
             continue
+        if key == 'gpus':
+            launch_env['CUDA_VISIBLE_DEVICES'] = value
+            start_args[key] = value
+            continue
         cmd.append('--{}'.format(key))
         cmd.append('{}'.format(value))
         start_args[key] = value
@@ -372,7 +377,8 @@ def launch_process(kwargs: dict):
             os.path.join(FASTDEPLOYSERVER_PATH, logfilename), 'w',
             buffering=1),
         stderr=STDOUT,
-        universal_newlines=True)
+        universal_newlines=True,
+        env=launch_env)
     server_name = start_args['server-name'] if start_args[
         'server-name'] else p.pid
     with open(
@@ -446,7 +452,7 @@ def get_process_logfile_name(server_id):
         with open(
                 os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
                 'r') as f:
-            filename = int(f.read().split('\n')[0])
+            filename = f.read().split('\n')[0]
     return filename
 
 

From f4e6a95c1967748f7c714b5433985451d564a6fb Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 28 Dec 2022 15:16:16 +0800
Subject: [PATCH 30/48] add judgement pretrained model download

---
 visualdl/component/inference/fastdeploy_server.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index e52eb6ec0..cd4d6309c 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -152,10 +152,17 @@ def download_pretrain_model(self, cur_dir, model_name, version,
                                 pretrain_model_name):
         version_resource_dir = os.path.join(
             os.path.abspath(cur_dir), model_name, version)
-        fd.download_model(name=pretrain_model_name, path=version_resource_dir)
-        os.system('mv {}/{}/* {} && rm -r {}/{}'.format(
-            version_resource_dir, pretrain_model_name, version_resource_dir,
-            version_resource_dir, pretrain_model_name))
+        model_path = fd.download_model(
+            name=pretrain_model_name, path=version_resource_dir)
+        if model_path:
+            os.system('mv {}/{}/* {} && rm -r {}/{}'.format(
+                version_resource_dir, pretrain_model_name,
+                version_resource_dir, version_resource_dir,
+                pretrain_model_name))
+        else:
+            raise RuntimeError(
+                "No pretrained model named {} can be downloaded".format(
+                    pretrain_model_name))
 
     def create_fastdeploy_client(self):
         if self.client_port is None:

From a31c40ccde3209f6e7b484b57ddcbcebec1f632e Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 28 Dec 2022 15:46:14 +0800
Subject: [PATCH 31/48] add judgement pretrained model download

---
 visualdl/component/inference/fastdeploy_lib.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 3163c9a66..0e35fffc8 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -356,8 +356,9 @@ def launch_process(kwargs: dict):
             start_args[key] = value
             continue
         if key == 'gpus':
-            launch_env['CUDA_VISIBLE_DEVICES'] = value
-            start_args[key] = value
+            if value:
+                launch_env['CUDA_VISIBLE_DEVICES'] = value
+                start_args[key] = value
             continue
         cmd.append('--{}'.format(key))
         cmd.append('{}'.format(value))

From 8d36f91204dd95dbeba2bc1e958c1f89e823ee3d Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 28 Dec 2022 17:45:39 +0800
Subject: [PATCH 32/48] add version info for frontend

---
 .../component/inference/fastdeploy_server.py  | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index cd4d6309c..44ff6023d 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -15,6 +15,7 @@
 import datetime
 import json
 import os
+import re
 import shutil
 import socket
 import time
@@ -159,6 +160,25 @@ def download_pretrain_model(self, cur_dir, model_name, version,
                 version_resource_dir, pretrain_model_name,
                 version_resource_dir, version_resource_dir,
                 pretrain_model_name))
+            version_info_for_frontend = []
+            for version_name in os.listdir(os.path.join(cur_dir, model_name)):
+                if re.match(
+                        r'\d+',
+                        version_name):  # version directory consists of numbers
+                    version_filenames_dict_for_frontend = {}
+                    version_filenames_dict_for_frontend['title'] = version_name
+                    version_filenames_dict_for_frontend['key'] = version_name
+                    version_filenames_dict_for_frontend['children'] = []
+                    for filename in os.listdir(
+                            os.path.join(cur_dir, model_name, version_name)):
+                        version_filenames_dict_for_frontend['children'].append(
+                            {
+                                'title': filename,
+                                'key': filename
+                            })
+                    version_info_for_frontend.append(
+                        version_filenames_dict_for_frontend)
+            return version_info_for_frontend
         else:
             raise RuntimeError(
                 "No pretrained model named {} can be downloaded".format(

From cefa9f754105066b1e5c8aa5c55ee349864f3180 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 29 Dec 2022 11:38:59 +0800
Subject: [PATCH 33/48] rename download model

---
 visualdl/component/inference/fastdeploy_server.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 44ff6023d..a1aee8047 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -156,10 +156,15 @@ def download_pretrain_model(self, cur_dir, model_name, version,
         model_path = fd.download_model(
             name=pretrain_model_name, path=version_resource_dir)
         if model_path:
-            os.system('mv {}/{}/* {} && rm -r {}/{}'.format(
-                version_resource_dir, pretrain_model_name,
-                version_resource_dir, version_resource_dir,
-                pretrain_model_name))
+            if '.onnx' in model_path:
+                os.system('mv {} {}/{}'.format(model_path,
+                                               os.path.dirname(model_path),
+                                               'model.onnx'))
+            else:
+                os.system('mv {}/{}/* {} && rm -r {}/{}'.format(
+                    version_resource_dir, pretrain_model_name,
+                    version_resource_dir, version_resource_dir,
+                    pretrain_model_name))
             version_info_for_frontend = []
             for version_name in os.listdir(os.path.join(cur_dir, model_name)):
                 if re.match(

From 86caf85ed72c889e7bd6afe268ed7662818983eb Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 29 Dec 2022 16:25:57 +0800
Subject: [PATCH 34/48] fix a bug

---
 visualdl/component/inference/fastdeploy_lib.py |  8 ++++----
 .../component/inference/fastdeploy_server.py   | 18 +++++++++++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 0e35fffc8..c8470502a 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -91,12 +91,12 @@ def analyse_config(cur_dir: str):
             if re.match(
                     r'\d+',
                     model_sub_dir):  # version directory consists of numbers
+                if model_name not in all_model_versions:
+                    all_model_versions[model_name] = {}
+                if model_sub_dir not in all_model_versions[model_name]:
+                    all_model_versions[model_name][model_sub_dir] = []
                 for version_resource_file in os.listdir(
                         os.path.join(model_dir, model_sub_dir)):
-                    if model_name not in all_model_versions:
-                        all_model_versions[model_name] = {}
-                    if model_sub_dir not in all_model_versions[model_name]:
-                        all_model_versions[model_name][model_sub_dir] = []
                     all_model_versions[model_name][model_sub_dir].append(
                         version_resource_file)
     if not all_model_configs:
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index a1aee8047..85fb58d7b 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -157,14 +157,18 @@ def download_pretrain_model(self, cur_dir, model_name, version,
             name=pretrain_model_name, path=version_resource_dir)
         if model_path:
             if '.onnx' in model_path:
-                os.system('mv {} {}/{}'.format(model_path,
-                                               os.path.dirname(model_path),
-                                               'model.onnx'))
+                shutil.move(
+                    model_path,
+                    os.path.join(os.path.dirname(model_path), 'model.onnx'))
             else:
-                os.system('mv {}/{}/* {} && rm -r {}/{}'.format(
-                    version_resource_dir, pretrain_model_name,
-                    version_resource_dir, version_resource_dir,
-                    pretrain_model_name))
+                for filename in os.listdir(model_path):
+                    if '.pdmodel' in filename or '.pdiparams' in filename:
+                        shutil.move(
+                            os.path.join(model_path, filename),
+                            os.path.join(
+                                os.path.dirname(model_path), 'model{}'.format(
+                                    os.path.splitext(filename)[1])))
+                shutil.rmtree(model_path)
             version_info_for_frontend = []
             for version_name in os.listdir(os.path.join(cur_dir, model_name)):
                 if re.match(

From 8c50447b15bb83a707eafd476895404f74f35ac8 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 29 Dec 2022 20:41:14 +0800
Subject: [PATCH 35/48] add fastdeploy model list

---
 .../component/inference/fastdeploy_server.py  | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 85fb58d7b..989656f67 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -148,6 +148,39 @@ def get_server_list(self):
     def get_server_config(self, server_id):
         return get_process_model_configuration(server_id)
 
+    @result()
+    def get_pretrain_model_list(self):
+        '''
+        Get all available fastdeploy models from hub server.
+        '''
+        res = requests.get(
+            'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels')
+        result = res.json()
+        if result['status'] != 0:
+            raise RuntimeError("Can not get model list from hub model server.")
+        else:
+            data = result['data']
+            model_list = {}
+            for category, models in data.items():
+                if category not in model_list:
+                    model_list[category] = set()
+                for model in models:
+                    model_list[category].add(model['name'])
+            # adapt data format for frontend
+            models_info = []
+            for category, model_names in model_list.items():
+                models_info.append({
+                    "value": category,
+                    "label": category,
+                    "children": []
+                })
+                for model_name in sorted(model_names):
+                    models_info[-1]["children"].append({
+                        "value": model_name,
+                        "label": model_name
+                    })
+            return models_info
+
     @result()
     def download_pretrain_model(self, cur_dir, model_name, version,
                                 pretrain_model_name):
@@ -236,6 +269,7 @@ def create_fastdeploy_api_call():
         'get_server_list': (api.get_server_list, []),
         'get_server_metric': (api.get_server_metric, ['server_id']),
         'get_server_config': (api.get_server_config, ['server_id']),
+        'get_pretrain_model_list': (api.get_pretrain_model_list, []),
         'download_pretrain_model':
         (api.download_pretrain_model,
          ['dir', 'name', 'version', 'pretrain_model_name']),

From db7ba0bcd7994a86dc5d2b6fe60f2b3b07ec7d35 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 3 Jan 2023 16:53:06 +0800
Subject: [PATCH 36/48] optimize for choose configuration files

---
 .../component/inference/fastdeploy_lib.py     | 82 +++++++++++++++++--
 .../component/inference/fastdeploy_server.py  | 60 +++++++++++---
 2 files changed, 120 insertions(+), 22 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index c8470502a..dda957da1 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -17,6 +17,7 @@
 import os
 import random
 import re
+import shutil
 import signal
 import string
 from collections import defaultdict
@@ -79,14 +80,38 @@ def analyse_config(cur_dir: str):
         model_dir, model_sub_dirs, filenames = os.walk(
             os.path.join(parent_dir, model_dir_name)).send(None)
         model_name = os.path.basename(model_dir)
+        config_filenames = []
         for filename in filenames:
-            if 'config.pbtxt' in filename:
-                json_config = json.loads(
-                    pbtxt2json(open(os.path.join(model_dir, filename)).read()))
-                all_model_configs[
-                    model_name] = json_config  # store original config file content in json format
-                if 'name' not in json_config:
-                    json_config['name'] = model_name
+            if '.pbtxt' in filename:
+                config_filenames.append(
+                    filename
+                )  # filenames with extension .pbtxt are all config files
+        if config_filenames:
+            default_config_filename = config_filenames[0]
+            if 'config.pbtxt' in config_filenames:
+                default_config_filename = 'config.pbtxt'
+                config_filenames.remove(default_config_filename)
+                config_filenames.insert(0, default_config_filename)
+            else:
+                # if no config.pbtxt, we choose the first file in config_filenames list to create config.pbtxt
+                shutil.copy(
+                    os.path.join(model_dir, default_config_filename),
+                    os.path.join(model_dir, 'config.pbtxt'))
+                default_config_filename = 'config.pbtxt'
+                config_filenames.insert(0, default_config_filename)
+            json_config = json.loads(
+                pbtxt2json(
+                    open(os.path.join(model_dir,
+                                      default_config_filename)).read()))
+            json_config[
+                "config_filenames"] = config_filenames  # add config_filenames to config data
+            all_model_configs[
+                model_name] = json_config  # store original config file content in json format
+            json_config[
+                'name'] = model_name  # because name in config data may be different from model_name,
+            # model_name is model directory name actually, we should conform name with model_name.
+        else:
+            continue
         for model_sub_dir in model_sub_dirs:
             if re.match(
                     r'\d+',
@@ -100,8 +125,7 @@ def analyse_config(cur_dir: str):
                     all_model_versions[model_name][model_sub_dir].append(
                         version_resource_file)
     if not all_model_configs:
-        raise Exception(
-            'Not a valid model repository, please choose the right path')
+        raise Exception('所选择的路径不是一个有效的模型库，请选择正确的路径')
     return all_model_configs, all_model_versions
 
 
@@ -128,6 +152,8 @@ def exchange_format_to_original_format(exchange_format):
         # 2. delete versions information
         if 'versions' in model_config:
             del model_config['versions']
+        if 'config_filenames' in model_config:
+            del model_config['config_filenames']
         if 'platform' in model_config and model_config[
                 'platform'] == 'ensemble':  # emsemble model
             # 3. add 'ensembleScheduling' keyword
@@ -296,6 +322,44 @@ def analyse_step_relationships(step_config, inputs, outputs):  # noqa: C901
     calculate_layout_for_frontend(models_dict)
 
 
+def get_config_filenames_for_one_model(cur_dir, name):
+    _, _, filenames = os.walk(os.path.join(cur_dir, name)).send(None)
+    config_filenames = []
+    for filename in filenames:
+        if '.pbtxt' in filename:
+            config_filenames.append(
+                filename
+            )  # filenames with extension .pbtxt are all config files
+    return config_filenames
+
+
+def get_config_for_one_model(cur_dir, name, config_filename):
+    all_model_configs = {}
+    all_model_versions = {}
+    filename = os.path.join(cur_dir, name, config_filename)
+    json_config = json.loads(pbtxt2json(open(filename).read()))
+    if 'name' not in json_config:
+        json_config['name'] = name
+    all_model_configs[
+        name] = json_config  # store original config file content in json format
+    all_model_versions[name] = {}
+    for model_sub_dir in os.listdir(os.path.join(cur_dir, name)):
+        if re.match(r'\d+',
+                    model_sub_dir):  # version directory consists of numbers
+            if model_sub_dir not in all_model_versions[name]:
+                all_model_versions[name][model_sub_dir] = []
+            for version_resource_file in os.listdir(
+                    os.path.join(cur_dir, name, model_sub_dir)):
+                all_model_versions[name][model_sub_dir].append(
+                    version_resource_file)
+    model_config = original_format_to_exchange_format(all_model_configs,
+                                                      all_model_versions)
+    if model_config['ensembles']:
+        return model_config['ensembles'][0]
+    elif model_config['models']:
+        return model_config['models'][0]
+
+
 def calculate_layout_for_frontend(model_config_in_step):
     '''
     Analyse model topology connections and prepare the positions for each model in layout.
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 989656f67..5e0dc143b 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -31,6 +31,8 @@
 from .fastdeploy_lib import exchange_format_to_original_format
 from .fastdeploy_lib import generate_metric_table
 from .fastdeploy_lib import get_alive_fastdeploy_servers
+from .fastdeploy_lib import get_config_filenames_for_one_model
+from .fastdeploy_lib import get_config_for_one_model
 from .fastdeploy_lib import get_process_model_configuration
 from .fastdeploy_lib import get_process_output
 from .fastdeploy_lib import get_start_arguments
@@ -74,19 +76,25 @@ def get_config(self, cur_dir):
                                                   all_model_versions)
 
     @result()
-    def config_update(self, cur_dir, model_name, config):
+    def config_update(self, cur_dir, model_name, config, config_filename):
         config = json.loads(config)
         all_models = exchange_format_to_original_format(config)
         model_dir = os.path.join(os.path.abspath(cur_dir), model_name)
         filtered_config = validate_data(all_models[model_name])
         text_proto = json2pbtxt(json.dumps(filtered_config))
-        # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data
+        # backup user's config data first, when data corrupted by front-end, we still can recovery data
+        # backup config filename: {original_name}_vdlbackup_{datetime}.pbtxt
+        # backup config can only used to restore config.pbtxt
+        if 'vdlbackup' in config_filename:
+            raise RuntimeError("备份的配置文件不允许修改")
+        basename = os.path.splitext(config_filename)[0]
         shutil.copy(
-            os.path.join(model_dir, 'config.pbtxt'),
+            os.path.join(model_dir, config_filename),
             os.path.join(
-                model_dir, 'config_vdlbackup_{}.pbtxt'.format(
+                model_dir, '{}_vdlbackup_{}.pbtxt'.format(
+                    basename,
                     datetime.datetime.now().isoformat())))
-        with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
+        with open(os.path.join(model_dir, config_filename), 'w') as f:
             f.write(text_proto)
         return
 
@@ -95,9 +103,7 @@ def start_server(self, configs):
         configs = json.loads(configs)
         process = launch_process(configs)
         if process.poll() is not None:
-            raise RuntimeError(
-                "Launch fastdeploy server failed, please check your launching arguments"
-            )
+            raise RuntimeError("启动fastdeployserver服务器失败，请检查启动参数")
         server_name = configs['server-name'] if configs[
             'server-name'] else process.pid
         self.opened_servers[server_name] = process
@@ -157,7 +163,7 @@ def get_pretrain_model_list(self):
             'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels')
         result = res.json()
         if result['status'] != 0:
-            raise RuntimeError("Can not get model list from hub model server.")
+            raise RuntimeError("从hub的模型服务器请求模型列表失败")
         else:
             data = result['data']
             model_list = {}
@@ -222,9 +228,30 @@ def download_pretrain_model(self, cur_dir, model_name, version,
                         version_filenames_dict_for_frontend)
             return version_info_for_frontend
         else:
-            raise RuntimeError(
-                "No pretrained model named {} can be downloaded".format(
-                    pretrain_model_name))
+            raise RuntimeError("预训练模型{}下载失败".format(pretrain_model_name))
+
+    @result()
+    def get_config_for_model(self, cur_dir, name, config_filename):
+        return get_config_for_one_model(cur_dir, name, config_filename)
+
+    @result()
+    def get_config_filenames_for_model(self, cur_dir, name):
+        return get_config_filenames_for_one_model(cur_dir, name)
+
+    @result()
+    def set_default_config_for_model(self, cur_dir, name, config_filename):
+        model_dir = os.path.join(os.path.abspath(cur_dir), name)
+        # backup config.pbtxt to config_vdlbackup_{datetime}.pbtxt
+        if os.path.exists(os.path.join(model_dir, 'config.pbtxt')):
+            shutil.copy(
+                os.path.join(model_dir, 'config.pbtxt'),
+                os.path.join(
+                    model_dir, 'config_vdlbackup_{}.pbtxt'.format(
+                        datetime.datetime.now().isoformat())))
+        shutil.copy(
+            os.path.join(model_dir, config_filename),
+            os.path.join(model_dir, 'config.pbtxt'))
+        return
 
     def create_fastdeploy_client(self):
         if self.client_port is None:
@@ -260,8 +287,15 @@ def create_fastdeploy_api_call():
     api = FastDeployServerApi()
     routes = {
         'get_directory': (api.get_directory, ['dir']),
-        'config_update': (api.config_update, ['dir', 'name', 'config']),
+        'config_update': (api.config_update,
+                          ['dir', 'name', 'config', 'config_filename']),
         'get_config': (api.get_config, ['dir']),
+        'get_config_filenames_for_model': (api.get_config_filenames_for_model,
+                                           ['dir', 'name']),
+        'get_config_for_model': (api.get_config_for_model,
+                                 ['dir', 'name', 'config_filename']),
+        'set_default_config_for_model': (api.set_default_config_for_model,
+                                         ['dir', 'name', 'config_filename']),
         'start_server': (api.start_server, ['config']),
         'stop_server': (api.stop_server, ['server_id']),
         'get_server_output': (api.get_server_output, ['server_id', 'length']),

From 6ac3b4b8753ab60ed1e4b039db7b4b4ea5efe0fd Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 4 Jan 2023 20:50:15 +0800
Subject: [PATCH 37/48] modify according to frontend need

---
 visualdl/component/inference/fastdeploy_lib.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index dda957da1..b711c92cc 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -103,8 +103,9 @@ def analyse_config(cur_dir: str):
                 pbtxt2json(
                     open(os.path.join(model_dir,
                                       default_config_filename)).read()))
-            json_config[
-                "config_filenames"] = config_filenames  # add config_filenames to config data
+            json_config["config_filenames"] = config_filenames[
+                0]  # add config_filenames to config data (frontend developer said he only wanted one filename,
+            # and to request config_filenames by get_config_filenames_for_one_model later)
             all_model_configs[
                 model_name] = json_config  # store original config file content in json format
             json_config[
@@ -340,6 +341,7 @@ def get_config_for_one_model(cur_dir, name, config_filename):
     json_config = json.loads(pbtxt2json(open(filename).read()))
     if 'name' not in json_config:
         json_config['name'] = name
+    json_config["config_filenames"] = config_filename
     all_model_configs[
         name] = json_config  # store original config file content in json format
     all_model_versions[name] = {}

From 1067387887ef79821f019dd9ce704dd4f61b83ec Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Thu, 5 Jan 2023 15:54:19 +0800
Subject: [PATCH 38/48] fix name in config to model name

---
 visualdl/component/inference/fastdeploy_lib.py    | 5 +++--
 visualdl/component/inference/fastdeploy_server.py | 7 ++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index b711c92cc..c8e7889d3 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -339,8 +339,9 @@ def get_config_for_one_model(cur_dir, name, config_filename):
     all_model_versions = {}
     filename = os.path.join(cur_dir, name, config_filename)
     json_config = json.loads(pbtxt2json(open(filename).read()))
-    if 'name' not in json_config:
-        json_config['name'] = name
+    json_config[
+        'name'] = name  # because name in config data may be different from model_name,
+    # model_name is model directory name actually, we should conform name with model_name.
     json_config["config_filenames"] = config_filename
     all_model_configs[
         name] = json_config  # store original config file content in json format
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 5e0dc143b..fe852d8c8 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -248,9 +248,10 @@ def set_default_config_for_model(self, cur_dir, name, config_filename):
                 os.path.join(
                     model_dir, 'config_vdlbackup_{}.pbtxt'.format(
                         datetime.datetime.now().isoformat())))
-        shutil.copy(
-            os.path.join(model_dir, config_filename),
-            os.path.join(model_dir, 'config.pbtxt'))
+        if config_filename != 'config.pbtxt':
+            shutil.copy(
+                os.path.join(model_dir, config_filename),
+                os.path.join(model_dir, 'config.pbtxt'))
         return
 
     def create_fastdeploy_client(self):

From c37fe8e31c55f079da24a65fcd29ebdebd02cdf5 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 6 Jan 2023 13:26:19 +0800
Subject: [PATCH 39/48] optimize for server list and alive judgement

---
 visualdl/component/inference/fastdeploy_lib.py    | 11 +++++++++--
 visualdl/component/inference/fastdeploy_server.py | 13 ++++++++++++-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index c8e7889d3..72196ee5b 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -326,11 +326,18 @@ def analyse_step_relationships(step_config, inputs, outputs):  # noqa: C901
 def get_config_filenames_for_one_model(cur_dir, name):
     _, _, filenames = os.walk(os.path.join(cur_dir, name)).send(None)
     config_filenames = []
+    backup_config_filenames = []
     for filename in filenames:
-        if '.pbtxt' in filename:
+        if '.pbtxt' in filename and 'vdlbackup' not in filename:
             config_filenames.append(
                 filename
-            )  # filenames with extension .pbtxt are all config files
+            )  # filenames with extension .pbtxt and not contain 'vdlbackup' are normal config files
+        elif '.pbtxt' in filename and 'vdlbackup' in filename:
+            backup_config_filenames.append(
+                filename
+            )  # filenames with extension .pbtxt and  contain 'vdlbackup' are backup config files
+    config_filenames = sorted(config_filenames) + sorted(
+        backup_config_filenames)
     return config_filenames
 
 
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index fe852d8c8..0249fdbf5 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -27,6 +27,7 @@
 
 from .fastdeploy_client.client_app import create_gradio_client_app
 from .fastdeploy_lib import analyse_config
+from .fastdeploy_lib import check_process_alive
 from .fastdeploy_lib import delete_files_for_process
 from .fastdeploy_lib import exchange_format_to_original_format
 from .fastdeploy_lib import generate_metric_table
@@ -103,7 +104,8 @@ def start_server(self, configs):
         configs = json.loads(configs)
         process = launch_process(configs)
         if process.poll() is not None:
-            raise RuntimeError("启动fastdeployserver服务器失败，请检查启动参数")
+            raise RuntimeError(
+                "启动fastdeployserver服务器失败，请检查环境中是否存在fastdeployserver程序")
         server_name = configs['server-name'] if configs[
             'server-name'] else process.pid
         self.opened_servers[server_name] = process
@@ -150,6 +152,14 @@ def get_server_metric(self, server_id):
     def get_server_list(self):
         return get_alive_fastdeploy_servers()
 
+    @result()
+    def check_server_alive(self, server_id):
+        if check_process_alive(server_id) is False:
+            delete_files_for_process(server_id)
+            raise RuntimeError(
+                "服务{}由于发生异常而退出，通常是由于启动参数设置不当或者环境配置有问题，请检查服务日志查看原因，然后手动关闭该服务项")
+        return
+
     @result()
     def get_server_config(self, server_id):
         return get_process_model_configuration(server_id)
@@ -305,6 +315,7 @@ def create_fastdeploy_api_call():
         'get_server_metric': (api.get_server_metric, ['server_id']),
         'get_server_config': (api.get_server_config, ['server_id']),
         'get_pretrain_model_list': (api.get_pretrain_model_list, []),
+        'check_server_alive': (api.check_server_alive, ['server_id']),
         'download_pretrain_model':
         (api.download_pretrain_model,
          ['dir', 'name', 'version', 'pretrain_model_name']),

From 1acff82eabf9e8dd124539d57643ceaff91ce18a Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 6 Jan 2023 17:03:17 +0800
Subject: [PATCH 40/48] keep server name as string type

---
 visualdl/component/inference/fastdeploy_server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 0249fdbf5..6498b54f8 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -107,7 +107,7 @@ def start_server(self, configs):
             raise RuntimeError(
                 "启动fastdeployserver服务器失败，请检查环境中是否存在fastdeployserver程序")
         server_name = configs['server-name'] if configs[
-            'server-name'] else process.pid
+            'server-name'] else str(process.pid)
         self.opened_servers[server_name] = process
         return server_name
 

From 29c72e8ad56ad1de02a7cebc8198f29abdc0e76f Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 6 Jan 2023 19:57:04 +0800
Subject: [PATCH 41/48] optimize process judgement logic

---
 requirements.txt                              |  1 +
 .../fastdeploy_client/http_client_manager.py  |  2 +-
 .../component/inference/fastdeploy_lib.py     | 58 ++++++++++++++++++-
 .../component/inference/fastdeploy_server.py  | 29 ++++++----
 4 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index b3a3bd220..943f7c4eb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,4 @@ gradio
 fastdeploy-python
 tritonclient[all]
 attrdict
+psutil
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index 47872e9b1..53aaca0e9 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -239,7 +239,7 @@ def _create_client(self, server_url):
         except Exception:
             raise RuntimeError(
                 'Can not connect to server {}, please check your \
-        server address'.format(server_url))
+                    server address'.format(server_url))
 
     def infer(self, server_url, model_name, model_version, inputs):
         fastdeploy_client = self._create_client(server_url)
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 72196ee5b..8527c798f 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -26,6 +26,7 @@
 
 import google.protobuf.json_format as json_format
 import google.protobuf.text_format as text_format
+import psutil
 import requests
 
 from .proto.model_config.protxt_pb2 import ModelConfig
@@ -437,7 +438,10 @@ def launch_process(kwargs: dict):
         cmd.append('--{}'.format(key))
         cmd.append('{}'.format(value))
         start_args[key] = value
-
+    if start_args['server-name'] and start_args['server-name'] in os.listdir(
+            FASTDEPLOYSERVER_PATH):
+        raise RuntimeError("启动服务失败，服务名称{}已经被使用，请重新填写服务名称".format(
+            start_args['server-name']))
     all_model_configs, all_model_versions = analyse_config(
         start_args['model-repository'])
     model_repo_config = original_format_to_exchange_format(
@@ -568,6 +572,29 @@ def get_process_output(server_id, length):
                 return data
 
 
+def mark_pid_for_dead_process(server_id):
+    '''
+    Resource files for a dead server only deleted when user closes the server in frontend.
+    When user close the server, pid recorded in logfile will be killed.
+    In case a dead process id is reassigned for a new process, we should mark the pid recorded in logfile as outdated.
+    Here, we choose to replace the pid to -1 in logfile to denote the zombie process \
+        which has been polled and becomes dead.
+    Args:
+        server_id(str): fastdeployserver process name
+    '''
+    if os.path.exists(
+            os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+        with open(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+                'r') as f:
+            contents = f.read().split('\n')
+        contents[1] = '-1'  # we replace pid to -1
+        with open(
+                os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+                'w') as f:
+            f.write('\n'.join(contents))
+
+
 def delete_files_for_process(server_id):
     '''
     Delete logfile for fastdeployserver process.
@@ -591,9 +618,10 @@ def kill_process(process):
   '''
     if type(process) == str:  # server_id, use os.kill to terminate
         pid = get_process_pid(process)
+        if pid == -1:  # we use -1 to mark dead process
+            return
         try:
             os.kill(pid, signal.SIGKILL)
-            # delete file ${pid} if exists
         except Exception:
             pass
     else:
@@ -624,6 +652,21 @@ def get_alive_fastdeploy_servers():
     return server_names
 
 
+def check_process_zombie(server_id):
+    '''
+    Given a server id, check whether the process became zoombie and mark pid as -1.
+    Args:
+        server_id(str): fastdeployserver process name
+    Return:
+        status(bool): True if process became zoombie.
+    '''
+    pid = get_process_pid(server_id)
+    if pid == -1:
+        return True
+    else:
+        return False
+
+
 def check_process_alive(server_id):
     '''
     Given a server id, check whether the process is alive or not.
@@ -635,12 +678,21 @@ def check_process_alive(server_id):
     pid = get_process_pid(server_id)
     if pid is None:
         return False
+    if pid == -1:  # We use -1 to mark zombie process which has been dead process.
+        # Consider user wants to know the reason for dead process  due to exception,
+        # we return True to let user in frontend can get the log for dead process.
+        return True
     try:
         os.kill(pid, 0)
     except OSError:
         return False
     else:
-        return True
+        if 'fastdeployserve' not in psutil.Process(pid).name(
+        ):  # We should judge the pid is fastdeployserver process, in case pid has been reassigned.
+            # Note: I do not know why psutil.Process(pid).name() is fastdeployserve but not fastdeployserver.
+            return False
+        else:
+            return True
 
 
 _metric_column_name = {
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 6498b54f8..b5d8af42c 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -27,7 +27,7 @@
 
 from .fastdeploy_client.client_app import create_gradio_client_app
 from .fastdeploy_lib import analyse_config
-from .fastdeploy_lib import check_process_alive
+from .fastdeploy_lib import check_process_zombie
 from .fastdeploy_lib import delete_files_for_process
 from .fastdeploy_lib import exchange_format_to_original_format
 from .fastdeploy_lib import generate_metric_table
@@ -40,6 +40,7 @@
 from .fastdeploy_lib import json2pbtxt
 from .fastdeploy_lib import kill_process
 from .fastdeploy_lib import launch_process
+from .fastdeploy_lib import mark_pid_for_dead_process
 from .fastdeploy_lib import original_format_to_exchange_format
 from .fastdeploy_lib import validate_data
 from visualdl.server.api import gen_result
@@ -121,13 +122,7 @@ def stop_server(self, server_id):
             # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
             kill_process(server_id)
         delete_files_for_process(server_id)
-        # check if there are servers killed by other vdl app instance and become zoombie
-        should_delete = []
-        for server_id, process in self.opened_servers.items():
-            if process.poll() is not None:
-                should_delete.append(server_id)
-        for server_id in should_delete:
-            del self.opened_servers[server_id]
+        self._poll_zombie_process()
 
     @result('text/plain')
     def get_server_output(self, server_id, length):
@@ -154,10 +149,11 @@ def get_server_list(self):
 
     @result()
     def check_server_alive(self, server_id):
-        if check_process_alive(server_id) is False:
-            delete_files_for_process(server_id)
+        self._poll_zombie_process()
+        if check_process_zombie(server_id) is True:
             raise RuntimeError(
-                "服务{}由于发生异常而退出，通常是由于启动参数设置不当或者环境配置有问题，请检查服务日志查看原因，然后手动关闭该服务项")
+                "服务{}由于发生异常或者被kill而退出，通常是由于启动参数设置不当或者环境配置有问题，请检查服务日志查看原因，然后手动关闭该服务项"
+                .format(server_id))
         return
 
     @result()
@@ -293,6 +289,17 @@ def check_alive():
             check_alive()
         return self.client_port
 
+    def _poll_zombie_process(self):
+        # check if there are servers killed by other vdl app instance and become zoombie
+        should_delete = []
+        for server_id, process in self.opened_servers.items():
+            if process.poll() is not None:
+                mark_pid_for_dead_process(server_id)
+                should_delete.append(server_id)
+
+        for server_id in should_delete:
+            del self.opened_servers[server_id]
+
 
 def create_fastdeploy_api_call():
     api = FastDeployServerApi()

From 92005ce1a3445037bcec103ffd35964896df7702 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Sun, 8 Jan 2023 18:44:59 +0800
Subject: [PATCH 42/48] optimize for deleting resource files

---
 .../inference/fastdeploy_client/client_app.py |  1 -
 .../fastdeploy_client/http_client_manager.py  | 12 +++--
 .../component/inference/fastdeploy_lib.py     |  6 +++
 .../component/inference/fastdeploy_server.py  | 51 +++++++++++++++++++
 4 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
index 7b28bdbe3..397b8255a 100644
--- a/visualdl/component/inference/fastdeploy_client/client_app.py
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -313,7 +313,6 @@ def component_inference(*args):
                 try:
                     infer_results = _http_manager.infer(
                         server_addr, model_name, model_version, inputs)
-                    print('infer_results', infer_results)
                     results = {status_text: 'Inference Successful'}
                     for i, (output_name,
                             data) in enumerate(infer_results.items()):
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index 53aaca0e9..572a837a7 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -252,9 +252,15 @@ def infer(self, server_url, model_name, model_version, inputs):
         results = {}
         for output in output_metadata:
             result = response.as_numpy(output.name)  # datatype: numpy
-            if output.datatype == 'BYTES':
-                try:  # maybe not vison tasks, normal text
-                    value = result[0][0]  # datatype: bytes
+            if output.datatype == 'BYTES':  # datatype: bytes
+                try:
+                    value = result
+                    if len(result.shape) == 1:
+                        value = result[0]
+                    elif len(result.shape) == 2:
+                        value = result[0][0]
+                    elif len(result.shape) == 3:
+                        value = result[0][0][0]
                     result = json.loads(value)  # datatype: json
                 except Exception:
                     pass
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 8527c798f..e99cfa16e 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -126,6 +126,12 @@ def analyse_config(cur_dir: str):
                         os.path.join(model_dir, model_sub_dir)):
                     all_model_versions[model_name][model_sub_dir].append(
                         version_resource_file)
+        if model_name not in all_model_versions:  # if a model has config but no version directory,
+            # to convenient users, we create one
+            all_model_versions[model_name] = {}
+            os.mkdir(os.path.join(model_dir, '1'))
+            all_model_versions[model_name]['1'] = []
+
     if not all_model_configs:
         raise Exception('所选择的路径不是一个有效的模型库，请选择正确的路径')
     return all_model_configs, all_model_versions
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index b5d8af42c..c2bd25b34 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -213,6 +213,11 @@ def download_pretrain_model(self, cur_dir, model_name, version,
                             os.path.join(
                                 os.path.dirname(model_path), 'model{}'.format(
                                     os.path.splitext(filename)[1])))
+                    else:
+                        shutil.move(
+                            os.path.join(model_path, filename),
+                            os.path.join(
+                                os.path.dirname(model_path), filename))
                 shutil.rmtree(model_path)
             version_info_for_frontend = []
             for version_name in os.listdir(os.path.join(cur_dir, model_name)):
@@ -244,6 +249,16 @@ def get_config_for_model(self, cur_dir, name, config_filename):
     def get_config_filenames_for_model(self, cur_dir, name):
         return get_config_filenames_for_one_model(cur_dir, name)
 
+    @result()
+    def delete_config_for_model(self, cur_dir, name, config_filename):
+        if self.root_dir not in Path(
+                os.path.abspath(cur_dir)
+        ).parents:  # should prevent user remove files outside model-repository
+            raise RuntimeError('所删除的文件路径有误')
+        if os.path.exists(os.path.join(cur_dir, name, config_filename)):
+            os.remove(os.path.join(cur_dir, name, config_filename))
+        return get_config_filenames_for_one_model(cur_dir, name)
+
     @result()
     def set_default_config_for_model(self, cur_dir, name, config_filename):
         model_dir = os.path.join(os.path.abspath(cur_dir), name)
@@ -260,6 +275,37 @@ def set_default_config_for_model(self, cur_dir, name, config_filename):
                 os.path.join(model_dir, 'config.pbtxt'))
         return
 
+    @result()
+    def delete_resource_for_model(self, cur_dir, model_name, version,
+                                  resource_filename):
+        if self.root_dir not in Path(
+                os.path.abspath(cur_dir)
+        ).parents:  # should prevent user remove files outside model-repository
+            raise RuntimeError('所删除的文件路径有误')
+        resource_path = os.path.join(
+            os.path.abspath(cur_dir), model_name, version, resource_filename)
+        if os.path.exists(resource_path):
+            os.remove(resource_path)
+        version_info_for_frontend = []
+        for version_name in os.listdir(os.path.join(cur_dir, model_name)):
+            if re.match(r'\d+',
+                        version_name):  # version directory consists of numbers
+                version_filenames_dict_for_frontend = {}
+                version_filenames_dict_for_frontend['title'] = version_name
+                version_filenames_dict_for_frontend['key'] = version_name
+                version_filenames_dict_for_frontend['children'] = []
+                for filename in os.listdir(
+                        os.path.join(cur_dir, model_name, version_name)):
+                    version_filenames_dict_for_frontend['children'].append({
+                        'title':
+                        filename,
+                        'key':
+                        filename
+                    })
+                version_info_for_frontend.append(
+                    version_filenames_dict_for_frontend)
+        return version_info_for_frontend
+
     def create_fastdeploy_client(self):
         if self.client_port is None:
 
@@ -314,6 +360,8 @@ def create_fastdeploy_api_call():
                                  ['dir', 'name', 'config_filename']),
         'set_default_config_for_model': (api.set_default_config_for_model,
                                          ['dir', 'name', 'config_filename']),
+        'delete_config_for_model': (api.delete_config_for_model,
+                                    ['dir', 'name', 'config_filename']),
         'start_server': (api.start_server, ['config']),
         'stop_server': (api.stop_server, ['server_id']),
         'get_server_output': (api.get_server_output, ['server_id', 'length']),
@@ -326,6 +374,9 @@ def create_fastdeploy_api_call():
         'download_pretrain_model':
         (api.download_pretrain_model,
          ['dir', 'name', 'version', 'pretrain_model_name']),
+        'delete_resource_for_model':
+        (api.delete_resource_for_model,
+         ['dir', 'name', 'version', 'resource_filename'])
     }
 
     def call(path: str, args):

From aeb2c9ba351e547fc132666577cbc2671eecbf5d Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Mon, 9 Jan 2023 11:51:20 +0800
Subject: [PATCH 43/48] add rename resource file

---
 visualdl/component/inference/fastdeploy_lib.py | 16 ++++++++++++----
 .../component/inference/fastdeploy_server.py   | 18 +++++++++++-------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index e99cfa16e..a6b31ac21 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -17,7 +17,6 @@
 import os
 import random
 import re
-import shutil
 import signal
 import string
 from collections import defaultdict
@@ -95,9 +94,8 @@ def analyse_config(cur_dir: str):
                 config_filenames.insert(0, default_config_filename)
             else:
                 # if no config.pbtxt, we choose the first file in config_filenames list to create config.pbtxt
-                shutil.copy(
-                    os.path.join(model_dir, default_config_filename),
-                    os.path.join(model_dir, 'config.pbtxt'))
+                copy_config_file_to_default_config(model_dir,
+                                                   default_config_filename)
                 default_config_filename = 'config.pbtxt'
                 config_filenames.insert(0, default_config_filename)
             json_config = json.loads(
@@ -191,6 +189,16 @@ def exchange_format_to_original_format(exchange_format):
     return all_models
 
 
+def copy_config_file_to_default_config(model_dir, config_name):
+    json_config = json.loads(
+        pbtxt2json(open(os.path.join(model_dir, config_name)).read()))
+    model_name = os.path.basename(model_dir)
+    json_config['name'] = model_name
+    text_proto = json2pbtxt(json.dumps(json_config))
+    with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
+        f.write(text_proto)
+
+
 def original_format_to_exchange_format(original_format, version_info):
     '''
   Change config original format to exchange format.
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index c2bd25b34..58eba20c9 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -28,6 +28,7 @@
 from .fastdeploy_client.client_app import create_gradio_client_app
 from .fastdeploy_lib import analyse_config
 from .fastdeploy_lib import check_process_zombie
+from .fastdeploy_lib import copy_config_file_to_default_config
 from .fastdeploy_lib import delete_files_for_process
 from .fastdeploy_lib import exchange_format_to_original_format
 from .fastdeploy_lib import generate_metric_table
@@ -270,22 +271,22 @@ def set_default_config_for_model(self, cur_dir, name, config_filename):
                     model_dir, 'config_vdlbackup_{}.pbtxt'.format(
                         datetime.datetime.now().isoformat())))
         if config_filename != 'config.pbtxt':
-            shutil.copy(
-                os.path.join(model_dir, config_filename),
-                os.path.join(model_dir, 'config.pbtxt'))
+            copy_config_file_to_default_config(model_dir, config_filename)
         return
 
     @result()
     def delete_resource_for_model(self, cur_dir, model_name, version,
-                                  resource_filename):
+                                  resource_filename, new_filename):
         if self.root_dir not in Path(
                 os.path.abspath(cur_dir)
         ).parents:  # should prevent user remove files outside model-repository
-            raise RuntimeError('所删除的文件路径有误')
+            raise RuntimeError('所重命名的文件路径有误')
         resource_path = os.path.join(
             os.path.abspath(cur_dir), model_name, version, resource_filename)
+        new_file_path = os.path.join(
+            os.path.abspath(cur_dir), model_name, version, new_filename)
         if os.path.exists(resource_path):
-            os.remove(resource_path)
+            shutil.move(resource_path, new_file_path)
         version_info_for_frontend = []
         for version_name in os.listdir(os.path.join(cur_dir, model_name)):
             if re.match(r'\d+',
@@ -376,7 +377,10 @@ def create_fastdeploy_api_call():
          ['dir', 'name', 'version', 'pretrain_model_name']),
         'delete_resource_for_model':
         (api.delete_resource_for_model,
-         ['dir', 'name', 'version', 'resource_filename'])
+         ['dir', 'name', 'version', 'resource_filename']),
+        'rename_resource_for_model': (api.rename_resource_for_model, [
+            'dir', 'name', 'version', 'resource_filename', 'new_filename'
+        ])
     }
 
     def call(path: str, args):

From 23a6c69de81f2881725fca3a1df0198924ce428b Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Mon, 9 Jan 2023 12:00:46 +0800
Subject: [PATCH 44/48] fix

---
 .../component/inference/fastdeploy_server.py  | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 58eba20c9..6397c33e5 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -276,6 +276,37 @@ def set_default_config_for_model(self, cur_dir, name, config_filename):
 
     @result()
     def delete_resource_for_model(self, cur_dir, model_name, version,
+                                  resource_filename):
+        if self.root_dir not in Path(
+                os.path.abspath(cur_dir)
+        ).parents:  # should prevent user remove files outside model-repository
+            raise RuntimeError('所删除的文件路径有误')
+        resource_path = os.path.join(
+            os.path.abspath(cur_dir), model_name, version, resource_filename)
+        if os.path.exists(resource_path):
+            os.remove(resource_path)
+        version_info_for_frontend = []
+        for version_name in os.listdir(os.path.join(cur_dir, model_name)):
+            if re.match(r'\d+',
+                        version_name):  # version directory consists of numbers
+                version_filenames_dict_for_frontend = {}
+                version_filenames_dict_for_frontend['title'] = version_name
+                version_filenames_dict_for_frontend['key'] = version_name
+                version_filenames_dict_for_frontend['children'] = []
+                for filename in os.listdir(
+                        os.path.join(cur_dir, model_name, version_name)):
+                    version_filenames_dict_for_frontend['children'].append({
+                        'title':
+                        filename,
+                        'key':
+                        filename
+                    })
+                version_info_for_frontend.append(
+                    version_filenames_dict_for_frontend)
+        return version_info_for_frontend
+
+    @result()
+    def rename_resource_for_model(self, cur_dir, model_name, version,
                                   resource_filename, new_filename):
         if self.root_dir not in Path(
                 os.path.abspath(cur_dir)

From 00566df408a86e9226554ba98625d8423a44fed6 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Mon, 9 Jan 2023 14:27:58 +0800
Subject: [PATCH 45/48] fix a bug

---
 .../fastdeploy_client/http_client_manager.py       | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
index 572a837a7..691594152 100644
--- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -15,6 +15,7 @@
 import json
 import re
 
+import numpy as np
 import requests
 import tritonclient.http as httpclient
 from attrdict import AttrDict
@@ -40,6 +41,18 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta):
             raise RuntimeError(
                 'Error: input name {} required for model not existed.'.format(
                     input_name))
+        if input_dict['datatype'] == 'FP32':
+            inputs_data[input_name] = inputs_data[input_name].astype(
+                np.float32
+            ) / 255  # image data returned by gradio is uint8, convert to fp32
+        if len(input_dict['shape']
+               ) == 3 and input_dict['shape'][0] == 3:  # NCHW
+            inputs_data[input_name] = inputs_data[input_name][0].transpose(
+                2, 0, 1)
+        elif len(input_dict['shape']
+                 ) == 4 and input_dict['shape'][1] == 3:  # NCHW
+            inputs_data[input_name] = inputs_data[input_name].transpose(
+                0, 3, 1, 2)
         infer_input = httpclient.InferInput(
             input_name, inputs_data[input_name].shape, input_dict['datatype'])
         infer_input.set_data_from_numpy(inputs_data[input_name])
@@ -249,6 +262,7 @@ def infer(self, server_url, model_name, model_version, inputs):
                                           output_metadata)
         response = fastdeploy_client.infer(
             model_name, inputs, model_version=model_version, outputs=outputs)
+
         results = {}
         for output in output_metadata:
             result = response.as_numpy(output.name)  # datatype: numpy

From 86c73cd3ff44bfee117fdba255f0a0a69cc16ff4 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 10 Jan 2023 15:47:24 +0800
Subject: [PATCH 46/48] optimize code structure

---
 .../inference/fastdeploy_client/__init__.py   |  14 +
 .../component/inference/fastdeploy_lib.py     |   2 +-
 .../inference/proto/model_config/__init__.py  |   0
 .../proto/model_config/protxt_pb2.py          | 856 ------------------
 4 files changed, 15 insertions(+), 857 deletions(-)
 delete mode 100644 visualdl/component/inference/proto/model_config/__init__.py
 delete mode 100644 visualdl/component/inference/proto/model_config/protxt_pb2.py

diff --git a/visualdl/component/inference/fastdeploy_client/__init__.py b/visualdl/component/inference/fastdeploy_client/__init__.py
index e69de29bb..9c19f7b87 100644
--- a/visualdl/component/inference/fastdeploy_client/__init__.py
+++ b/visualdl/component/inference/fastdeploy_client/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index a6b31ac21..3c0138f93 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -28,7 +28,7 @@
 import psutil
 import requests
 
-from .proto.model_config.protxt_pb2 import ModelConfig
+from .proto.model_config_pb2 import ModelConfig
 from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
 
 
diff --git a/visualdl/component/inference/proto/model_config/__init__.py b/visualdl/component/inference/proto/model_config/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/visualdl/component/inference/proto/model_config/protxt_pb2.py b/visualdl/component/inference/proto/model_config/protxt_pb2.py
deleted file mode 100644
index 70bf7b906..000000000
--- a/visualdl/component/inference/proto/model_config/protxt_pb2.py
+++ /dev/null
@@ -1,856 +0,0 @@
-# flake8: noqa
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: model_config.protxt
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import descriptor_pool as _descriptor_pool
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import enum_type_wrapper
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3'
-)
-
-_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType']
-DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
-TYPE_INVALID = 0
-TYPE_BOOL = 1
-TYPE_UINT8 = 2
-TYPE_UINT16 = 3
-TYPE_UINT32 = 4
-TYPE_UINT64 = 5
-TYPE_INT8 = 6
-TYPE_INT16 = 7
-TYPE_INT32 = 8
-TYPE_INT64 = 9
-TYPE_FP16 = 10
-TYPE_FP32 = 11
-TYPE_FP64 = 12
-TYPE_STRING = 13
-TYPE_BF16 = 14
-
-_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter']
-_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource']
-_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup']
-_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[
-    'SecondaryDevice']
-_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape']
-_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput']
-_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput']
-_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput']
-_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput']
-_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy']
-_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest']
-_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All']
-_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[
-    'Specific']
-_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[
-    'ModelOptimizationPolicy']
-_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
-    'Graph']
-_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
-    'Cuda']
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[
-    'GraphSpec']
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
-    'Shape']
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
-    'LowerBound']
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[
-    'InputEntry']
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
-    'InputEntry']
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
-    'ExecutionAccelerators']
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[
-    'Accelerator']
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[
-    'ParametersEntry']
-_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
-    'PinnedMemoryBuffer']
-_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy']
-_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[
-    'ModelDynamicBatching']
-_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[
-    'PriorityQueuePolicyEntry']
-_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[
-    'ModelSequenceBatching']
-_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[
-    'Control']
-_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[
-    'ControlInput']
-_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
-    'InitialState']
-_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
-    'State']
-_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[
-    'StrategyDirect']
-_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[
-    'StrategyOldest']
-_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling']
-_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step']
-_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
-    'InputMapEntry']
-_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
-    'OutputMapEntry']
-_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter']
-_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup']
-_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input']
-_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry']
-_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations']
-_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[
-    'ModelTransactionPolicy']
-_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[
-    'ModelRepositoryAgents']
-_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[
-    'Agent']
-_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[
-    'ParametersEntry']
-_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache']
-_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig']
-_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[
-    'CcModelFilenamesEntry']
-_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[
-    'MetricTagsEntry']
-_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[
-    'ParametersEntry']
-_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[
-    'SecondaryDeviceKind']
-_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind']
-_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format']
-_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind']
-_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind']
-_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[
-    'ModelPriority']
-_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[
-    'TimeoutAction']
-_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[
-    'Kind']
-ModelRateLimiter = _reflection.GeneratedProtocolMessageType(
-    'ModelRateLimiter',
-    (_message.Message, ),
-    {
-        'Resource':
-        _reflection.GeneratedProtocolMessageType(
-            'Resource',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELRATELIMITER_RESOURCE,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource)
-            }),
-        'DESCRIPTOR':
-        _MODELRATELIMITER,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter)
-    })
-_sym_db.RegisterMessage(ModelRateLimiter)
-_sym_db.RegisterMessage(ModelRateLimiter.Resource)
-
-ModelInstanceGroup = _reflection.GeneratedProtocolMessageType(
-    'ModelInstanceGroup',
-    (_message.Message, ),
-    {
-        'SecondaryDevice':
-        _reflection.GeneratedProtocolMessageType(
-            'SecondaryDevice',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice)
-            }),
-        'DESCRIPTOR':
-        _MODELINSTANCEGROUP,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup)
-    })
-_sym_db.RegisterMessage(ModelInstanceGroup)
-_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice)
-
-ModelTensorReshape = _reflection.GeneratedProtocolMessageType(
-    'ModelTensorReshape',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELTENSORRESHAPE,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape)
-    })
-_sym_db.RegisterMessage(ModelTensorReshape)
-
-ModelInput = _reflection.GeneratedProtocolMessageType(
-    'ModelInput',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELINPUT,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelInput)
-    })
-_sym_db.RegisterMessage(ModelInput)
-
-ModelOutput = _reflection.GeneratedProtocolMessageType(
-    'ModelOutput',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELOUTPUT,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelOutput)
-    })
-_sym_db.RegisterMessage(ModelOutput)
-
-BatchInput = _reflection.GeneratedProtocolMessageType(
-    'BatchInput',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _BATCHINPUT,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.BatchInput)
-    })
-_sym_db.RegisterMessage(BatchInput)
-
-BatchOutput = _reflection.GeneratedProtocolMessageType(
-    'BatchOutput',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _BATCHOUTPUT,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.BatchOutput)
-    })
-_sym_db.RegisterMessage(BatchOutput)
-
-ModelVersionPolicy = _reflection.GeneratedProtocolMessageType(
-    'ModelVersionPolicy',
-    (_message.Message, ),
-    {
-        'Latest':
-        _reflection.GeneratedProtocolMessageType(
-            'Latest',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest)
-            }),
-        'All':
-        _reflection.GeneratedProtocolMessageType(
-            'All',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELVERSIONPOLICY_ALL,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All)
-            }),
-        'Specific':
-        _reflection.GeneratedProtocolMessageType(
-            'Specific',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific)
-            }),
-        'DESCRIPTOR':
-        _MODELVERSIONPOLICY,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy)
-    })
-_sym_db.RegisterMessage(ModelVersionPolicy)
-_sym_db.RegisterMessage(ModelVersionPolicy.Latest)
-_sym_db.RegisterMessage(ModelVersionPolicy.All)
-_sym_db.RegisterMessage(ModelVersionPolicy.Specific)
-
-ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType(
-    'ModelOptimizationPolicy',
-    (_message.Message, ),
-    {
-        'Graph':
-        _reflection.GeneratedProtocolMessageType(
-            'Graph',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph)
-            }),
-        'Cuda':
-        _reflection.GeneratedProtocolMessageType(
-            'Cuda',
-            (_message.Message, ),
-            {
-                'GraphSpec':
-                _reflection.GeneratedProtocolMessageType(
-                    'GraphSpec',
-                    (_message.Message, ),
-                    {
-                        'Shape':
-                        _reflection.GeneratedProtocolMessageType(
-                            'Shape',
-                            (_message.Message, ),
-                            {
-                                'DESCRIPTOR':
-                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
-                                '__module__': 'model_config.protxt_pb2'
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
-                            }),
-                        'LowerBound':
-                        _reflection.GeneratedProtocolMessageType(
-                            'LowerBound',
-                            (_message.Message, ),
-                            {
-                                'InputEntry':
-                                _reflection.GeneratedProtocolMessageType(
-                                    'InputEntry',
-                                    (_message.Message, ),
-                                    {
-                                        'DESCRIPTOR':
-                                        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
-                                        '__module__': 'model_config.protxt_pb2'
-                                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
-                                    }),
-                                'DESCRIPTOR':
-                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
-                                '__module__':
-                                'model_config.protxt_pb2'
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
-                            }),
-                        'InputEntry':
-                        _reflection.GeneratedProtocolMessageType(
-                            'InputEntry',
-                            (_message.Message, ),
-                            {
-                                'DESCRIPTOR':
-                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
-                                '__module__': 'model_config.protxt_pb2'
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
-                            }),
-                        'DESCRIPTOR':
-                        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
-                        '__module__':
-                        'model_config.protxt_pb2'
-                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec)
-                    }),
-                'DESCRIPTOR':
-                _MODELOPTIMIZATIONPOLICY_CUDA,
-                '__module__':
-                'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda)
-            }),
-        'ExecutionAccelerators':
-        _reflection.GeneratedProtocolMessageType(
-            'ExecutionAccelerators',
-            (_message.Message, ),
-            {
-                'Accelerator':
-                _reflection.GeneratedProtocolMessageType(
-                    'Accelerator',
-                    (_message.Message, ),
-                    {
-                        'ParametersEntry':
-                        _reflection.GeneratedProtocolMessageType(
-                            'ParametersEntry',
-                            (_message.Message, ),
-                            {
-                                'DESCRIPTOR':
-                                _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
-                                '__module__': 'model_config.protxt_pb2'
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
-                            }),
-                        'DESCRIPTOR':
-                        _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
-                        '__module__':
-                        'model_config.protxt_pb2'
-                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
-                    }),
-                'DESCRIPTOR':
-                _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
-                '__module__':
-                'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators)
-            }),
-        'PinnedMemoryBuffer':
-        _reflection.GeneratedProtocolMessageType(
-            'PinnedMemoryBuffer',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer)
-            }),
-        'DESCRIPTOR':
-        _MODELOPTIMIZATIONPOLICY,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy)
-    })
-_sym_db.RegisterMessage(ModelOptimizationPolicy)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
-_sym_db.RegisterMessage(
-    ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators)
-_sym_db.RegisterMessage(
-    ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
-_sym_db.RegisterMessage(
-    ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer)
-
-ModelQueuePolicy = _reflection.GeneratedProtocolMessageType(
-    'ModelQueuePolicy',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELQUEUEPOLICY,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy)
-    })
-_sym_db.RegisterMessage(ModelQueuePolicy)
-
-ModelDynamicBatching = _reflection.GeneratedProtocolMessageType(
-    'ModelDynamicBatching',
-    (_message.Message, ),
-    {
-        'PriorityQueuePolicyEntry':
-        _reflection.GeneratedProtocolMessageType(
-            'PriorityQueuePolicyEntry',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry)
-            }),
-        'DESCRIPTOR':
-        _MODELDYNAMICBATCHING,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching)
-    })
-_sym_db.RegisterMessage(ModelDynamicBatching)
-_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry)
-
-ModelSequenceBatching = _reflection.GeneratedProtocolMessageType(
-    'ModelSequenceBatching',
-    (_message.Message, ),
-    {
-        'Control':
-        _reflection.GeneratedProtocolMessageType(
-            'Control',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control)
-            }),
-        'ControlInput':
-        _reflection.GeneratedProtocolMessageType(
-            'ControlInput',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput)
-            }),
-        'InitialState':
-        _reflection.GeneratedProtocolMessageType(
-            'InitialState',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState)
-            }),
-        'State':
-        _reflection.GeneratedProtocolMessageType(
-            'State',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State)
-            }),
-        'StrategyDirect':
-        _reflection.GeneratedProtocolMessageType(
-            'StrategyDirect',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect)
-            }),
-        'StrategyOldest':
-        _reflection.GeneratedProtocolMessageType(
-            'StrategyOldest',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest)
-            }),
-        'DESCRIPTOR':
-        _MODELSEQUENCEBATCHING,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching)
-    })
-_sym_db.RegisterMessage(ModelSequenceBatching)
-_sym_db.RegisterMessage(ModelSequenceBatching.Control)
-_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput)
-_sym_db.RegisterMessage(ModelSequenceBatching.InitialState)
-_sym_db.RegisterMessage(ModelSequenceBatching.State)
-_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect)
-_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest)
-
-ModelEnsembling = _reflection.GeneratedProtocolMessageType(
-    'ModelEnsembling',
-    (_message.Message, ),
-    {
-        'Step':
-        _reflection.GeneratedProtocolMessageType(
-            'Step',
-            (_message.Message, ),
-            {
-                'InputMapEntry':
-                _reflection.GeneratedProtocolMessageType(
-                    'InputMapEntry',
-                    (_message.Message, ),
-                    {
-                        'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY,
-                        '__module__': 'model_config.protxt_pb2'
-                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry)
-                    }),
-                'OutputMapEntry':
-                _reflection.GeneratedProtocolMessageType(
-                    'OutputMapEntry',
-                    (_message.Message, ),
-                    {
-                        'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
-                        '__module__': 'model_config.protxt_pb2'
-                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry)
-                    }),
-                'DESCRIPTOR':
-                _MODELENSEMBLING_STEP,
-                '__module__':
-                'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step)
-            }),
-        'DESCRIPTOR':
-        _MODELENSEMBLING,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling)
-    })
-_sym_db.RegisterMessage(ModelEnsembling)
-_sym_db.RegisterMessage(ModelEnsembling.Step)
-_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry)
-_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry)
-
-ModelParameter = _reflection.GeneratedProtocolMessageType(
-    'ModelParameter',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELPARAMETER,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelParameter)
-    })
-_sym_db.RegisterMessage(ModelParameter)
-
-ModelWarmup = _reflection.GeneratedProtocolMessageType(
-    'ModelWarmup',
-    (_message.Message, ),
-    {
-        'Input':
-        _reflection.GeneratedProtocolMessageType(
-            'Input',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELWARMUP_INPUT,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input)
-            }),
-        'InputsEntry':
-        _reflection.GeneratedProtocolMessageType(
-            'InputsEntry',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry)
-            }),
-        'DESCRIPTOR':
-        _MODELWARMUP,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelWarmup)
-    })
-_sym_db.RegisterMessage(ModelWarmup)
-_sym_db.RegisterMessage(ModelWarmup.Input)
-_sym_db.RegisterMessage(ModelWarmup.InputsEntry)
-
-ModelOperations = _reflection.GeneratedProtocolMessageType(
-    'ModelOperations',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELOPERATIONS,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelOperations)
-    })
-_sym_db.RegisterMessage(ModelOperations)
-
-ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType(
-    'ModelTransactionPolicy',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELTRANSACTIONPOLICY,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy)
-    })
-_sym_db.RegisterMessage(ModelTransactionPolicy)
-
-ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType(
-    'ModelRepositoryAgents',
-    (_message.Message, ),
-    {
-        'Agent':
-        _reflection.GeneratedProtocolMessageType(
-            'Agent',
-            (_message.Message, ),
-            {
-                'ParametersEntry':
-                _reflection.GeneratedProtocolMessageType(
-                    'ParametersEntry',
-                    (_message.Message, ),
-                    {
-                        'DESCRIPTOR':
-                        _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY,
-                        '__module__': 'model_config.protxt_pb2'
-                        # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry)
-                    }),
-                'DESCRIPTOR':
-                _MODELREPOSITORYAGENTS_AGENT,
-                '__module__':
-                'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent)
-            }),
-        'DESCRIPTOR':
-        _MODELREPOSITORYAGENTS,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents)
-    })
-_sym_db.RegisterMessage(ModelRepositoryAgents)
-_sym_db.RegisterMessage(ModelRepositoryAgents.Agent)
-_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry)
-
-ModelResponseCache = _reflection.GeneratedProtocolMessageType(
-    'ModelResponseCache',
-    (_message.Message, ),
-    {
-        'DESCRIPTOR': _MODELRESPONSECACHE,
-        '__module__': 'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelResponseCache)
-    })
-_sym_db.RegisterMessage(ModelResponseCache)
-
-ModelConfig = _reflection.GeneratedProtocolMessageType(
-    'ModelConfig',
-    (_message.Message, ),
-    {
-        'CcModelFilenamesEntry':
-        _reflection.GeneratedProtocolMessageType(
-            'CcModelFilenamesEntry',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry)
-            }),
-        'MetricTagsEntry':
-        _reflection.GeneratedProtocolMessageType(
-            'MetricTagsEntry',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry)
-            }),
-        'ParametersEntry':
-        _reflection.GeneratedProtocolMessageType(
-            'ParametersEntry',
-            (_message.Message, ),
-            {
-                'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY,
-                '__module__': 'model_config.protxt_pb2'
-                # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry)
-            }),
-        'DESCRIPTOR':
-        _MODELCONFIG,
-        '__module__':
-        'model_config.protxt_pb2'
-        # @@protoc_insertion_point(class_scope:inference.ModelConfig)
-    })
-_sym_db.RegisterMessage(ModelConfig)
-_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry)
-_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry)
-_sym_db.RegisterMessage(ModelConfig.ParametersEntry)
-
-if _descriptor._USE_C_DESCRIPTORS == False:
-
-    DESCRIPTOR._options = None
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001'
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001'
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001'
-    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None
-    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001'
-    _MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None
-    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001'
-    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None
-    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001'
-    _MODELWARMUP_INPUTSENTRY._options = None
-    _MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001'
-    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None
-    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001'
-    _MODELCONFIG_CCMODELFILENAMESENTRY._options = None
-    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001'
-    _MODELCONFIG_METRICTAGSENTRY._options = None
-    _MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001'
-    _MODELCONFIG_PARAMETERSENTRY._options = None
-    _MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001'
-    _DATATYPE._serialized_start = 8137
-    _DATATYPE._serialized_end = 8387
-    _MODELRATELIMITER._serialized_start = 35
-    _MODELRATELIMITER._serialized_end = 185
-    _MODELRATELIMITER_RESOURCE._serialized_start = 130
-    _MODELRATELIMITER_RESOURCE._serialized_end = 185
-    _MODELINSTANCEGROUP._serialized_start = 188
-    _MODELINSTANCEGROUP._serialized_end = 707
-    _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484
-    _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640
-    _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603
-    _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640
-    _MODELINSTANCEGROUP_KIND._serialized_start = 642
-    _MODELINSTANCEGROUP_KIND._serialized_end = 707
-    _MODELTENSORRESHAPE._serialized_start = 709
-    _MODELTENSORRESHAPE._serialized_end = 744
-    _MODELINPUT._serialized_start = 747
-    _MODELINPUT._serialized_end = 1053
-    _MODELINPUT_FORMAT._serialized_start = 994
-    _MODELINPUT_FORMAT._serialized_end = 1053
-    _MODELOUTPUT._serialized_start = 1056
-    _MODELOUTPUT._serialized_end = 1234
-    _BATCHINPUT._serialized_start = 1237
-    _BATCHINPUT._serialized_end = 1582
-    _BATCHINPUT_KIND._serialized_start = 1377
-    _BATCHINPUT_KIND._serialized_end = 1582
-    _BATCHOUTPUT._serialized_start = 1585
-    _BATCHOUTPUT._serialized_end = 1728
-    _BATCHOUTPUT_KIND._serialized_start = 1686
-    _BATCHOUTPUT_KIND._serialized_end = 1728
-    _MODELVERSIONPOLICY._serialized_start = 1731
-    _MODELVERSIONPOLICY._serialized_end = 2003
-    _MODELVERSIONPOLICY_LATEST._serialized_start = 1919
-    _MODELVERSIONPOLICY_LATEST._serialized_end = 1949
-    _MODELVERSIONPOLICY_ALL._serialized_start = 1951
-    _MODELVERSIONPOLICY_ALL._serialized_end = 1956
-    _MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958
-    _MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986
-    _MODELOPTIMIZATIONPOLICY._serialized_start = 2006
-    _MODELOPTIMIZATIONPOLICY._serialized_end = 3795
-    _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536
-    _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558
-    _MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561
-    _MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682
-    _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684
-    _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720
-    _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722
-    _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795
-    _MODELQUEUEPOLICY._serialized_start = 3798
-    _MODELQUEUEPOLICY._serialized_end = 4017
-    _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979
-    _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017
-    _MODELDYNAMICBATCHING._serialized_start = 4020
-    _MODELDYNAMICBATCHING._serialized_end = 4431
-    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344
-    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431
-    _MODELSEQUENCEBATCHING._serialized_start = 4434
-    _MODELSEQUENCEBATCHING._serialized_end = 5697
-    _MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759
-    _MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064
-    _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947
-    _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064
-    _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066
-    _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153
-    _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156
-    _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294
-    _MODELSEQUENCEBATCHING_STATE._serialized_start = 5297
-    _MODELSEQUENCEBATCHING_STATE._serialized_end = 5469
-    _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471
-    _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559
-    _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561
-    _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678
-    _MODELENSEMBLING._serialized_start = 5700
-    _MODELENSEMBLING._serialized_end = 6049
-    _MODELENSEMBLING_STEP._serialized_start = 5767
-    _MODELENSEMBLING_STEP._serialized_end = 6049
-    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952
-    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999
-    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001
-    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049
-    _MODELPARAMETER._serialized_start = 6051
-    _MODELPARAMETER._serialized_end = 6089
-    _MODELWARMUP._serialized_start = 6092
-    _MODELWARMUP._serialized_end = 6437
-    _MODELWARMUP_INPUT._serialized_start = 6209
-    _MODELWARMUP_INPUT._serialized_end = 6360
-    _MODELWARMUP_INPUTSENTRY._serialized_start = 6362
-    _MODELWARMUP_INPUTSENTRY._serialized_end = 6437
-    _MODELOPERATIONS._serialized_start = 6439
-    _MODELOPERATIONS._serialized_end = 6485
-    _MODELTRANSACTIONPOLICY._serialized_start = 6487
-    _MODELTRANSACTIONPOLICY._serialized_end = 6530
-    _MODELREPOSITORYAGENTS._serialized_start = 6533
-    _MODELREPOSITORYAGENTS._serialized_end = 6763
-    _MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615
-    _MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763
-    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633
-    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682
-    _MODELRESPONSECACHE._serialized_start = 6765
-    _MODELRESPONSECACHE._serialized_end = 6801
-    _MODELCONFIG._serialized_start = 6804
-    _MODELCONFIG._serialized_end = 8134
-    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929
-    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984
-    _MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986
-    _MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035
-    _MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037
-    _MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113
-# @@protoc_insertion_point(module_scope)

From 7c3c3b7ecc3ba20fcdf8ef22afeb1bbd2226ed49 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 10 Jan 2023 15:47:49 +0800
Subject: [PATCH 47/48] optimize code structure

---
 .../component/inference/proto/__init__.py     |  14 +
 .../inference/proto/model_config_pb2.py       | 856 ++++++++++++++++++
 2 files changed, 870 insertions(+)
 create mode 100644 visualdl/component/inference/proto/__init__.py
 create mode 100644 visualdl/component/inference/proto/model_config_pb2.py

diff --git a/visualdl/component/inference/proto/__init__.py b/visualdl/component/inference/proto/__init__.py
new file mode 100644
index 000000000..9c19f7b87
--- /dev/null
+++ b/visualdl/component/inference/proto/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
diff --git a/visualdl/component/inference/proto/model_config_pb2.py b/visualdl/component/inference/proto/model_config_pb2.py
new file mode 100644
index 000000000..70bf7b906
--- /dev/null
+++ b/visualdl/component/inference/proto/model_config_pb2.py
@@ -0,0 +1,856 @@
+# flake8: noqa
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: model_config.protxt
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import enum_type_wrapper
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3'
+)
+
+_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType']
+DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
+TYPE_INVALID = 0
+TYPE_BOOL = 1
+TYPE_UINT8 = 2
+TYPE_UINT16 = 3
+TYPE_UINT32 = 4
+TYPE_UINT64 = 5
+TYPE_INT8 = 6
+TYPE_INT16 = 7
+TYPE_INT32 = 8
+TYPE_INT64 = 9
+TYPE_FP16 = 10
+TYPE_FP32 = 11
+TYPE_FP64 = 12
+TYPE_STRING = 13
+TYPE_BF16 = 14
+
+_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter']
+_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource']
+_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup']
+_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[
+    'SecondaryDevice']
+_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape']
+_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput']
+_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput']
+_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput']
+_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput']
+_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy']
+_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest']
+_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All']
+_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[
+    'Specific']
+_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[
+    'ModelOptimizationPolicy']
+_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'Graph']
+_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'Cuda']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[
+    'GraphSpec']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+    'Shape']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+    'LowerBound']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[
+    'InputEntry']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+    'InputEntry']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'ExecutionAccelerators']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[
+    'Accelerator']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[
+    'ParametersEntry']
+_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+    'PinnedMemoryBuffer']
+_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy']
+_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[
+    'ModelDynamicBatching']
+_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[
+    'PriorityQueuePolicyEntry']
+_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[
+    'ModelSequenceBatching']
+_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'Control']
+_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'ControlInput']
+_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'InitialState']
+_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'State']
+_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'StrategyDirect']
+_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[
+    'StrategyOldest']
+_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling']
+_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step']
+_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
+    'InputMapEntry']
+_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
+    'OutputMapEntry']
+_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter']
+_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup']
+_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input']
+_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry']
+_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations']
+_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[
+    'ModelTransactionPolicy']
+_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[
+    'ModelRepositoryAgents']
+_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[
+    'Agent']
+_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[
+    'ParametersEntry']
+_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache']
+_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig']
+_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[
+    'CcModelFilenamesEntry']
+_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[
+    'MetricTagsEntry']
+_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[
+    'ParametersEntry']
+_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[
+    'SecondaryDeviceKind']
+_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind']
+_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format']
+_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind']
+_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind']
+_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[
+    'ModelPriority']
+_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[
+    'TimeoutAction']
+_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[
+    'Kind']
+ModelRateLimiter = _reflection.GeneratedProtocolMessageType(
+    'ModelRateLimiter',
+    (_message.Message, ),
+    {
+        'Resource':
+        _reflection.GeneratedProtocolMessageType(
+            'Resource',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELRATELIMITER_RESOURCE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource)
+            }),
+        'DESCRIPTOR':
+        _MODELRATELIMITER,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter)
+    })
+_sym_db.RegisterMessage(ModelRateLimiter)
+_sym_db.RegisterMessage(ModelRateLimiter.Resource)
+
+ModelInstanceGroup = _reflection.GeneratedProtocolMessageType(
+    'ModelInstanceGroup',
+    (_message.Message, ),
+    {
+        'SecondaryDevice':
+        _reflection.GeneratedProtocolMessageType(
+            'SecondaryDevice',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice)
+            }),
+        'DESCRIPTOR':
+        _MODELINSTANCEGROUP,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup)
+    })
+_sym_db.RegisterMessage(ModelInstanceGroup)
+_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice)
+
+ModelTensorReshape = _reflection.GeneratedProtocolMessageType(
+    'ModelTensorReshape',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELTENSORRESHAPE,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape)
+    })
+_sym_db.RegisterMessage(ModelTensorReshape)
+
+ModelInput = _reflection.GeneratedProtocolMessageType(
+    'ModelInput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELINPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelInput)
+    })
+_sym_db.RegisterMessage(ModelInput)
+
+ModelOutput = _reflection.GeneratedProtocolMessageType(
+    'ModelOutput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELOUTPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelOutput)
+    })
+_sym_db.RegisterMessage(ModelOutput)
+
+BatchInput = _reflection.GeneratedProtocolMessageType(
+    'BatchInput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _BATCHINPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.BatchInput)
+    })
+_sym_db.RegisterMessage(BatchInput)
+
+BatchOutput = _reflection.GeneratedProtocolMessageType(
+    'BatchOutput',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _BATCHOUTPUT,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.BatchOutput)
+    })
+_sym_db.RegisterMessage(BatchOutput)
+
+ModelVersionPolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelVersionPolicy',
+    (_message.Message, ),
+    {
+        'Latest':
+        _reflection.GeneratedProtocolMessageType(
+            'Latest',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest)
+            }),
+        'All':
+        _reflection.GeneratedProtocolMessageType(
+            'All',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELVERSIONPOLICY_ALL,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All)
+            }),
+        'Specific':
+        _reflection.GeneratedProtocolMessageType(
+            'Specific',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific)
+            }),
+        'DESCRIPTOR':
+        _MODELVERSIONPOLICY,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy)
+    })
+_sym_db.RegisterMessage(ModelVersionPolicy)
+_sym_db.RegisterMessage(ModelVersionPolicy.Latest)
+_sym_db.RegisterMessage(ModelVersionPolicy.All)
+_sym_db.RegisterMessage(ModelVersionPolicy.Specific)
+
+ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelOptimizationPolicy',
+    (_message.Message, ),
+    {
+        'Graph':
+        _reflection.GeneratedProtocolMessageType(
+            'Graph',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph)
+            }),
+        'Cuda':
+        _reflection.GeneratedProtocolMessageType(
+            'Cuda',
+            (_message.Message, ),
+            {
+                'GraphSpec':
+                _reflection.GeneratedProtocolMessageType(
+                    'GraphSpec',
+                    (_message.Message, ),
+                    {
+                        'Shape':
+                        _reflection.GeneratedProtocolMessageType(
+                            'Shape',
+                            (_message.Message, ),
+                            {
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
+                                '__module__': 'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
+                            }),
+                        'LowerBound':
+                        _reflection.GeneratedProtocolMessageType(
+                            'LowerBound',
+                            (_message.Message, ),
+                            {
+                                'InputEntry':
+                                _reflection.GeneratedProtocolMessageType(
+                                    'InputEntry',
+                                    (_message.Message, ),
+                                    {
+                                        'DESCRIPTOR':
+                                        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
+                                        '__module__': 'model_config.protxt_pb2'
+                                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
+                                    }),
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
+                                '__module__':
+                                'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
+                            }),
+                        'InputEntry':
+                        _reflection.GeneratedProtocolMessageType(
+                            'InputEntry',
+                            (_message.Message, ),
+                            {
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
+                                '__module__': 'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
+                            }),
+                        'DESCRIPTOR':
+                        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
+                        '__module__':
+                        'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec)
+                    }),
+                'DESCRIPTOR':
+                _MODELOPTIMIZATIONPOLICY_CUDA,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda)
+            }),
+        'ExecutionAccelerators':
+        _reflection.GeneratedProtocolMessageType(
+            'ExecutionAccelerators',
+            (_message.Message, ),
+            {
+                'Accelerator':
+                _reflection.GeneratedProtocolMessageType(
+                    'Accelerator',
+                    (_message.Message, ),
+                    {
+                        'ParametersEntry':
+                        _reflection.GeneratedProtocolMessageType(
+                            'ParametersEntry',
+                            (_message.Message, ),
+                            {
+                                'DESCRIPTOR':
+                                _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
+                                '__module__': 'model_config.protxt_pb2'
+                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
+                            }),
+                        'DESCRIPTOR':
+                        _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
+                        '__module__':
+                        'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
+                    }),
+                'DESCRIPTOR':
+                _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators)
+            }),
+        'PinnedMemoryBuffer':
+        _reflection.GeneratedProtocolMessageType(
+            'PinnedMemoryBuffer',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer)
+            }),
+        'DESCRIPTOR':
+        _MODELOPTIMIZATIONPOLICY,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy)
+    })
+_sym_db.RegisterMessage(ModelOptimizationPolicy)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
+_sym_db.RegisterMessage(
+    ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators)
+_sym_db.RegisterMessage(
+    ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
+_sym_db.RegisterMessage(
+    ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer)
+
+ModelQueuePolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelQueuePolicy',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELQUEUEPOLICY,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy)
+    })
+_sym_db.RegisterMessage(ModelQueuePolicy)
+
+ModelDynamicBatching = _reflection.GeneratedProtocolMessageType(
+    'ModelDynamicBatching',
+    (_message.Message, ),
+    {
+        'PriorityQueuePolicyEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'PriorityQueuePolicyEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry)
+            }),
+        'DESCRIPTOR':
+        _MODELDYNAMICBATCHING,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching)
+    })
+_sym_db.RegisterMessage(ModelDynamicBatching)
+_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry)
+
+ModelSequenceBatching = _reflection.GeneratedProtocolMessageType(
+    'ModelSequenceBatching',
+    (_message.Message, ),
+    {
+        'Control':
+        _reflection.GeneratedProtocolMessageType(
+            'Control',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control)
+            }),
+        'ControlInput':
+        _reflection.GeneratedProtocolMessageType(
+            'ControlInput',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput)
+            }),
+        'InitialState':
+        _reflection.GeneratedProtocolMessageType(
+            'InitialState',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState)
+            }),
+        'State':
+        _reflection.GeneratedProtocolMessageType(
+            'State',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State)
+            }),
+        'StrategyDirect':
+        _reflection.GeneratedProtocolMessageType(
+            'StrategyDirect',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect)
+            }),
+        'StrategyOldest':
+        _reflection.GeneratedProtocolMessageType(
+            'StrategyOldest',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest)
+            }),
+        'DESCRIPTOR':
+        _MODELSEQUENCEBATCHING,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching)
+    })
+_sym_db.RegisterMessage(ModelSequenceBatching)
+_sym_db.RegisterMessage(ModelSequenceBatching.Control)
+_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput)
+_sym_db.RegisterMessage(ModelSequenceBatching.InitialState)
+_sym_db.RegisterMessage(ModelSequenceBatching.State)
+_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect)
+_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest)
+
+ModelEnsembling = _reflection.GeneratedProtocolMessageType(
+    'ModelEnsembling',
+    (_message.Message, ),
+    {
+        'Step':
+        _reflection.GeneratedProtocolMessageType(
+            'Step',
+            (_message.Message, ),
+            {
+                'InputMapEntry':
+                _reflection.GeneratedProtocolMessageType(
+                    'InputMapEntry',
+                    (_message.Message, ),
+                    {
+                        'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY,
+                        '__module__': 'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry)
+                    }),
+                'OutputMapEntry':
+                _reflection.GeneratedProtocolMessageType(
+                    'OutputMapEntry',
+                    (_message.Message, ),
+                    {
+                        'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
+                        '__module__': 'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry)
+                    }),
+                'DESCRIPTOR':
+                _MODELENSEMBLING_STEP,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step)
+            }),
+        'DESCRIPTOR':
+        _MODELENSEMBLING,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling)
+    })
+_sym_db.RegisterMessage(ModelEnsembling)
+_sym_db.RegisterMessage(ModelEnsembling.Step)
+_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry)
+_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry)
+
+ModelParameter = _reflection.GeneratedProtocolMessageType(
+    'ModelParameter',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELPARAMETER,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelParameter)
+    })
+_sym_db.RegisterMessage(ModelParameter)
+
+ModelWarmup = _reflection.GeneratedProtocolMessageType(
+    'ModelWarmup',
+    (_message.Message, ),
+    {
+        'Input':
+        _reflection.GeneratedProtocolMessageType(
+            'Input',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELWARMUP_INPUT,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input)
+            }),
+        'InputsEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'InputsEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry)
+            }),
+        'DESCRIPTOR':
+        _MODELWARMUP,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelWarmup)
+    })
+_sym_db.RegisterMessage(ModelWarmup)
+_sym_db.RegisterMessage(ModelWarmup.Input)
+_sym_db.RegisterMessage(ModelWarmup.InputsEntry)
+
+ModelOperations = _reflection.GeneratedProtocolMessageType(
+    'ModelOperations',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELOPERATIONS,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelOperations)
+    })
+_sym_db.RegisterMessage(ModelOperations)
+
+ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType(
+    'ModelTransactionPolicy',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELTRANSACTIONPOLICY,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy)
+    })
+_sym_db.RegisterMessage(ModelTransactionPolicy)
+
+ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType(
+    'ModelRepositoryAgents',
+    (_message.Message, ),
+    {
+        'Agent':
+        _reflection.GeneratedProtocolMessageType(
+            'Agent',
+            (_message.Message, ),
+            {
+                'ParametersEntry':
+                _reflection.GeneratedProtocolMessageType(
+                    'ParametersEntry',
+                    (_message.Message, ),
+                    {
+                        'DESCRIPTOR':
+                        _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY,
+                        '__module__': 'model_config.protxt_pb2'
+                        # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry)
+                    }),
+                'DESCRIPTOR':
+                _MODELREPOSITORYAGENTS_AGENT,
+                '__module__':
+                'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent)
+            }),
+        'DESCRIPTOR':
+        _MODELREPOSITORYAGENTS,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents)
+    })
+_sym_db.RegisterMessage(ModelRepositoryAgents)
+_sym_db.RegisterMessage(ModelRepositoryAgents.Agent)
+_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry)
+
+ModelResponseCache = _reflection.GeneratedProtocolMessageType(
+    'ModelResponseCache',
+    (_message.Message, ),
+    {
+        'DESCRIPTOR': _MODELRESPONSECACHE,
+        '__module__': 'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelResponseCache)
+    })
+_sym_db.RegisterMessage(ModelResponseCache)
+
+ModelConfig = _reflection.GeneratedProtocolMessageType(
+    'ModelConfig',
+    (_message.Message, ),
+    {
+        'CcModelFilenamesEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'CcModelFilenamesEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry)
+            }),
+        'MetricTagsEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'MetricTagsEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry)
+            }),
+        'ParametersEntry':
+        _reflection.GeneratedProtocolMessageType(
+            'ParametersEntry',
+            (_message.Message, ),
+            {
+                'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY,
+                '__module__': 'model_config.protxt_pb2'
+                # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry)
+            }),
+        'DESCRIPTOR':
+        _MODELCONFIG,
+        '__module__':
+        'model_config.protxt_pb2'
+        # @@protoc_insertion_point(class_scope:inference.ModelConfig)
+    })
+_sym_db.RegisterMessage(ModelConfig)
+_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry)
+_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry)
+_sym_db.RegisterMessage(ModelConfig.ParametersEntry)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001'
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001'
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001'
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001'
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001'
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001'
+    _MODELWARMUP_INPUTSENTRY._options = None
+    _MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001'
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001'
+    _MODELCONFIG_CCMODELFILENAMESENTRY._options = None
+    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001'
+    _MODELCONFIG_METRICTAGSENTRY._options = None
+    _MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001'
+    _MODELCONFIG_PARAMETERSENTRY._options = None
+    _MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001'
+    _DATATYPE._serialized_start = 8137
+    _DATATYPE._serialized_end = 8387
+    _MODELRATELIMITER._serialized_start = 35
+    _MODELRATELIMITER._serialized_end = 185
+    _MODELRATELIMITER_RESOURCE._serialized_start = 130
+    _MODELRATELIMITER_RESOURCE._serialized_end = 185
+    _MODELINSTANCEGROUP._serialized_start = 188
+    _MODELINSTANCEGROUP._serialized_end = 707
+    _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484
+    _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640
+    _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603
+    _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640
+    _MODELINSTANCEGROUP_KIND._serialized_start = 642
+    _MODELINSTANCEGROUP_KIND._serialized_end = 707
+    _MODELTENSORRESHAPE._serialized_start = 709
+    _MODELTENSORRESHAPE._serialized_end = 744
+    _MODELINPUT._serialized_start = 747
+    _MODELINPUT._serialized_end = 1053
+    _MODELINPUT_FORMAT._serialized_start = 994
+    _MODELINPUT_FORMAT._serialized_end = 1053
+    _MODELOUTPUT._serialized_start = 1056
+    _MODELOUTPUT._serialized_end = 1234
+    _BATCHINPUT._serialized_start = 1237
+    _BATCHINPUT._serialized_end = 1582
+    _BATCHINPUT_KIND._serialized_start = 1377
+    _BATCHINPUT_KIND._serialized_end = 1582
+    _BATCHOUTPUT._serialized_start = 1585
+    _BATCHOUTPUT._serialized_end = 1728
+    _BATCHOUTPUT_KIND._serialized_start = 1686
+    _BATCHOUTPUT_KIND._serialized_end = 1728
+    _MODELVERSIONPOLICY._serialized_start = 1731
+    _MODELVERSIONPOLICY._serialized_end = 2003
+    _MODELVERSIONPOLICY_LATEST._serialized_start = 1919
+    _MODELVERSIONPOLICY_LATEST._serialized_end = 1949
+    _MODELVERSIONPOLICY_ALL._serialized_start = 1951
+    _MODELVERSIONPOLICY_ALL._serialized_end = 1956
+    _MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958
+    _MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986
+    _MODELOPTIMIZATIONPOLICY._serialized_start = 2006
+    _MODELOPTIMIZATIONPOLICY._serialized_end = 3795
+    _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536
+    _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558
+    _MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561
+    _MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055
+    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633
+    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682
+    _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684
+    _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720
+    _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722
+    _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795
+    _MODELQUEUEPOLICY._serialized_start = 3798
+    _MODELQUEUEPOLICY._serialized_end = 4017
+    _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979
+    _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017
+    _MODELDYNAMICBATCHING._serialized_start = 4020
+    _MODELDYNAMICBATCHING._serialized_end = 4431
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344
+    _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431
+    _MODELSEQUENCEBATCHING._serialized_start = 4434
+    _MODELSEQUENCEBATCHING._serialized_end = 5697
+    _MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759
+    _MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064
+    _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947
+    _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064
+    _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066
+    _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153
+    _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156
+    _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294
+    _MODELSEQUENCEBATCHING_STATE._serialized_start = 5297
+    _MODELSEQUENCEBATCHING_STATE._serialized_end = 5469
+    _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471
+    _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559
+    _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561
+    _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678
+    _MODELENSEMBLING._serialized_start = 5700
+    _MODELENSEMBLING._serialized_end = 6049
+    _MODELENSEMBLING_STEP._serialized_start = 5767
+    _MODELENSEMBLING_STEP._serialized_end = 6049
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952
+    _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001
+    _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049
+    _MODELPARAMETER._serialized_start = 6051
+    _MODELPARAMETER._serialized_end = 6089
+    _MODELWARMUP._serialized_start = 6092
+    _MODELWARMUP._serialized_end = 6437
+    _MODELWARMUP_INPUT._serialized_start = 6209
+    _MODELWARMUP_INPUT._serialized_end = 6360
+    _MODELWARMUP_INPUTSENTRY._serialized_start = 6362
+    _MODELWARMUP_INPUTSENTRY._serialized_end = 6437
+    _MODELOPERATIONS._serialized_start = 6439
+    _MODELOPERATIONS._serialized_end = 6485
+    _MODELTRANSACTIONPOLICY._serialized_start = 6487
+    _MODELTRANSACTIONPOLICY._serialized_end = 6530
+    _MODELREPOSITORYAGENTS._serialized_start = 6533
+    _MODELREPOSITORYAGENTS._serialized_end = 6763
+    _MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615
+    _MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633
+    _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682
+    _MODELRESPONSECACHE._serialized_start = 6765
+    _MODELRESPONSECACHE._serialized_end = 6801
+    _MODELCONFIG._serialized_start = 6804
+    _MODELCONFIG._serialized_end = 8134
+    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929
+    _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984
+    _MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986
+    _MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035
+    _MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037
+    _MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113
+# @@protoc_insertion_point(module_scope)

From 3a628e36eb5592819c7f1eeb97e1c733f257c257 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 10 Jan 2023 16:48:33 +0800
Subject: [PATCH 48/48] remove chinese tips and remove fastdeploy-python in
 requirements

---
 requirements.txt                              |  1 -
 .../inference/fastdeploy_client/visualizer.py | 49 ++++++++++++++++++-
 .../component/inference/fastdeploy_lib.py     |  9 ++--
 .../component/inference/fastdeploy_server.py  | 33 +++++++++----
 4 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 9f492a288..a16a15778 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,6 @@ packaging
 x2paddle
 rarfile
 gradio
-fastdeploy-python
 tritonclient[all]
 attrdict
 psutil
diff --git a/visualdl/component/inference/fastdeploy_client/visualizer.py b/visualdl/component/inference/fastdeploy_client/visualizer.py
index 5abe570e0..2c6abe0b4 100644
--- a/visualdl/component/inference/fastdeploy_client/visualizer.py
+++ b/visualdl/component/inference/fastdeploy_client/visualizer.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =======================================================================
-import fastdeploy as fd
 import numpy as np
 
 __all__ = [
@@ -24,6 +23,12 @@
 
 
 def visualize_detection(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     boxes = np.array(data['boxes'])
     scores = np.array(data['scores'])
     label_ids = np.array(data['label_ids'])
@@ -40,6 +45,12 @@ def visualize_detection(image, data):
 
 
 def visualize_keypoint_detection(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     keypoints = np.array(data['keypoints'])
     scores = np.array(data['scores'])
     num_joints = np.array(data['num_joints'])
@@ -54,6 +65,12 @@ def visualize_keypoint_detection(image, data):
 
 
 def visualize_face_detection(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     data = np.array(data['data'])
     scores = np.array(data['scores'])
     landmarks = np.array(data['landmarks'])
@@ -70,6 +87,12 @@ def visualize_face_detection(image, data):
 
 
 def visualize_face_alignment(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     landmarks = np.array(data['landmarks'])
 
     facealignment_result = fd.C.vision.FaceAlignmentResult()
@@ -80,6 +103,12 @@ def visualize_face_alignment(image, data):
 
 
 def visualize_segmentation(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     label_ids = np.array(data['label_ids'])
     score_map = np.array(data['score_map'])
     shape = np.array(data['shape'])
@@ -94,6 +123,12 @@ def visualize_segmentation(image, data):
 
 
 def visualize_matting(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     alpha = np.array(data['alpha'])
     foreground = np.array(data['foreground'])
     contain_foreground = data['contain_foreground']
@@ -110,6 +145,12 @@ def visualize_matting(image, data):
 
 
 def visualize_ocr(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     boxes = np.array(data['boxes'])
     text = np.array(data['text'])
     rec_scores = np.array(data['rec_scores'])
@@ -128,6 +169,12 @@ def visualize_ocr(image, data):
 
 
 def visualize_headpose(image, data):
+    try:
+        import fastdeploy as fd
+    except Exception:
+        raise RuntimeError(
+            "fastdeploy is required for visualizing results，please refer to \
+        https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
     euler_angles = np.array(data['euler_angles'])
 
     headpose_result = fd.C.vision.HeadPoseResult()
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
index 3c0138f93..5264c6e77 100644
--- a/visualdl/component/inference/fastdeploy_lib.py
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -131,7 +131,9 @@ def analyse_config(cur_dir: str):
             all_model_versions[model_name]['1'] = []
 
     if not all_model_configs:
-        raise Exception('所选择的路径不是一个有效的模型库，请选择正确的路径')
+        raise Exception(
+            'The path you choose is not a valid model repository, please choose a valid path.'
+        )
     return all_model_configs, all_model_versions
 
 
@@ -454,8 +456,9 @@ def launch_process(kwargs: dict):
         start_args[key] = value
     if start_args['server-name'] and start_args['server-name'] in os.listdir(
             FASTDEPLOYSERVER_PATH):
-        raise RuntimeError("启动服务失败，服务名称{}已经被使用，请重新填写服务名称".format(
-            start_args['server-name']))
+        raise RuntimeError(
+            "Failed to launch server，server name {} has been used，please write a different server name."
+            .format(start_args['server-name']))
     all_model_configs, all_model_versions = analyse_config(
         start_args['model-repository'])
     model_repo_config = original_format_to_exchange_format(
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
index 6397c33e5..89b0b13ff 100644
--- a/visualdl/component/inference/fastdeploy_server.py
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -22,7 +22,6 @@
 from multiprocessing import Process
 from pathlib import Path
 
-import fastdeploy as fd
 import requests
 
 from .fastdeploy_client.client_app import create_gradio_client_app
@@ -89,7 +88,8 @@ def config_update(self, cur_dir, model_name, config, config_filename):
         # backup config filename: {original_name}_vdlbackup_{datetime}.pbtxt
         # backup config can only used to restore config.pbtxt
         if 'vdlbackup' in config_filename:
-            raise RuntimeError("备份的配置文件不允许修改")
+            raise RuntimeError(
+                "Backup config file is not permitted to update.")
         basename = os.path.splitext(config_filename)[0]
         shutil.copy(
             os.path.join(model_dir, config_filename),
@@ -107,7 +107,8 @@ def start_server(self, configs):
         process = launch_process(configs)
         if process.poll() is not None:
             raise RuntimeError(
-                "启动fastdeployserver服务器失败，请检查环境中是否存在fastdeployserver程序")
+                "Failed to launch fastdeployserver，please check fastdeployserver is installed in environment."
+            )
         server_name = configs['server-name'] if configs[
             'server-name'] else str(process.pid)
         self.opened_servers[server_name] = process
@@ -153,8 +154,8 @@ def check_server_alive(self, server_id):
         self._poll_zombie_process()
         if check_process_zombie(server_id) is True:
             raise RuntimeError(
-                "服务{}由于发生异常或者被kill而退出，通常是由于启动参数设置不当或者环境配置有问题，请检查服务日志查看原因，然后手动关闭该服务项"
-                .format(server_id))
+                "Server {} is down due to exception or killed，please check the reason according to the log, \
+                then close this server.".format(server_id))
         return
 
     @result()
@@ -170,7 +171,8 @@ def get_pretrain_model_list(self):
             'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels')
         result = res.json()
         if result['status'] != 0:
-            raise RuntimeError("从hub的模型服务器请求模型列表失败")
+            raise RuntimeError(
+                "Failed to get pre-trained model list from hub server.")
         else:
             data = result['data']
             model_list = {}
@@ -199,6 +201,12 @@ def download_pretrain_model(self, cur_dir, model_name, version,
                                 pretrain_model_name):
         version_resource_dir = os.path.join(
             os.path.abspath(cur_dir), model_name, version)
+        try:
+            import fastdeploy as fd
+        except Exception:
+            raise RuntimeError(
+                "fastdeploy is required for visualizing results，please refer to \
+            https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
         model_path = fd.download_model(
             name=pretrain_model_name, path=version_resource_dir)
         if model_path:
@@ -240,7 +248,9 @@ def download_pretrain_model(self, cur_dir, model_name, version,
                         version_filenames_dict_for_frontend)
             return version_info_for_frontend
         else:
-            raise RuntimeError("预训练模型{}下载失败".format(pretrain_model_name))
+            raise RuntimeError(
+                "Failed to download pre-trained model {}.".format(
+                    pretrain_model_name))
 
     @result()
     def get_config_for_model(self, cur_dir, name, config_filename):
@@ -255,7 +265,8 @@ def delete_config_for_model(self, cur_dir, name, config_filename):
         if self.root_dir not in Path(
                 os.path.abspath(cur_dir)
         ).parents:  # should prevent user remove files outside model-repository
-            raise RuntimeError('所删除的文件路径有误')
+            raise RuntimeError(
+                'Failed to delete config file, please check filepath.')
         if os.path.exists(os.path.join(cur_dir, name, config_filename)):
             os.remove(os.path.join(cur_dir, name, config_filename))
         return get_config_filenames_for_one_model(cur_dir, name)
@@ -280,7 +291,8 @@ def delete_resource_for_model(self, cur_dir, model_name, version,
         if self.root_dir not in Path(
                 os.path.abspath(cur_dir)
         ).parents:  # should prevent user remove files outside model-repository
-            raise RuntimeError('所删除的文件路径有误')
+            raise RuntimeError(
+                'Failed to delete resource file, please check filepath.')
         resource_path = os.path.join(
             os.path.abspath(cur_dir), model_name, version, resource_filename)
         if os.path.exists(resource_path):
@@ -311,7 +323,8 @@ def rename_resource_for_model(self, cur_dir, model_name, version,
         if self.root_dir not in Path(
                 os.path.abspath(cur_dir)
         ).parents:  # should prevent user remove files outside model-repository
-            raise RuntimeError('所重命名的文件路径有误')
+            raise RuntimeError(
+                'Failed to rename resource file, please check filepath.')
         resource_path = os.path.join(
             os.path.abspath(cur_dir), model_name, version, resource_filename)
         new_file_path = os.path.join(