From f2e6a8f75942b1b2957aa0e67bd42e0938b151b0 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 21 Nov 2022 16:30:26 +0800 Subject: [PATCH 01/48] add backend support for fastdeploy server --- .../component/inference/fastdeploy_lib.py | 54 + .../component/inference/fastdeploy_server.py | 102 + .../inference/proto/model_config.protxt | 1981 +++++++++++++++++ .../proto/model_config/protxt_pb2.py | 855 +++++++ visualdl/server/app.py | 11 + 5 files changed, 3003 insertions(+) create mode 100644 visualdl/component/inference/fastdeploy_lib.py create mode 100644 visualdl/component/inference/fastdeploy_server.py create mode 100644 visualdl/component/inference/proto/model_config.protxt create mode 100644 visualdl/component/inference/proto/model_config/protxt_pb2.py diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py new file mode 100644 index 000000000..0f56b70d0 --- /dev/null +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -0,0 +1,54 @@ +import multiprocessing +from subprocess import CalledProcessError +from subprocess import PIPE +from subprocess import Popen + +import google.protobuf.json_format as json_format +import google.protobuf.text_format as text_format + +from .proto.model_config.protxt_pb2 import ModelConfig + + +def pbtxt2json(content: str): + ''' + Convert protocol messages in text format to json format string. + ''' + message = text_format.Parse(content, ModelConfig()) + json_string = json_format.MessageToJson(message) + return json_string + + +def json2pbtxt(content: str): + ''' + Convert json format string to protocol messages in text format. + ''' + message = json_format.Parse(content, ModelConfig()) + text_proto = text_format.MessageToString(message) + return text_proto + + +def launch_process(kwargs: dict): + ''' + Launch a fastdeploy server according to specified arguments. + ''' + cmd = ['fastdeployserver'] + for key, value in kwargs.items(): + cmd.append('--{}'.format(key)) + cmd.append('{}'.foramt(value)) + p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True) + return p + + +def get_process_output(process): + ''' + Get the standard output of a opened subprocess. + ''' + for line in process.stdout: + yield line + + +def kill_process(process): + ''' + Stop a opened subprocess. + ''' + process.kill() diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py new file mode 100644 index 000000000..d54a3a7a4 --- /dev/null +++ b/visualdl/component/inference/fastdeploy_server.py @@ -0,0 +1,102 @@ +# Copyright (c) 2022 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= +import base64 +import json +import os +import tempfile +from collections import deque +from pathlib import Path +from threading import Lock + +from flask import request + +from .fastdeploy_lib import get_process_output +from .fastdeploy_lib import json2pbtxt +from .fastdeploy_lib import kill_process +from .fastdeploy_lib import launch_process +from .fastdeploy_lib import pbtxt2json +from visualdl.server.api import gen_result +from visualdl.server.api import result + + +class FastDeployServerApi(object): + def __init__(self): + self.root_dir = Path(os.getcwd()) + self.opened_servers = { + } # Use to store the opened server process pid and process itself + + @result() + def get_directory(self, cur_dir): + if self.root_dir not in Path(os.path.abspath(cur_dir)).parents: + cur_dir = '.' + cur_dir, sub_dirs, filenames = os.walk(cur_dir).send(None) + if Path(self.root_dir) != Path(os.path.abspath(cur_dir)): + sub_dirs.append('..') + directorys = { + 'parent_dir': + os.path.relpath(Path(os.path.abspath(cur_dir)), self.root_dir), + 'sub_dir': + sub_dirs + } + return directorys + + @result() + def get_config(self, cur_dir): + pass + + @result() + def config_update(self, cur_dir, model_name): + pass + + @result() + def start_server(self, configs): + process = launch_process(configs) + self.opened_servers[process.pid] = process + return process.pid + + @result() + def stop_server(self, server_id): + if server_id not in self.opened_servers: + return + kill_process(self.opened_servers[server_id]) + del self.opened_servers[server_id] + + @result('text/plain') + def get_server_output(self, server_id): + stdout_generator = get_process_output(server_id) + return stdout_generator + + +def create_fastdeploy_api_call(): + api = FastDeployServerApi() + routes = { + 'get_directory': (api.get_directory, ['dir']), + 'config_update': (api.config_update, ['dir', 'name']), + 'get_config': (api.get_config, ['dir']), + 'start_server': (api.start_server, ['dir', 'args']), + 'stop_server': (api.stop_server, ['server_id']), + 'get_server_output': (api.get_server_output, ['server_id']) + } + + def call(path: str, args): + route = routes.get(path) + if not route: + return json.dumps(gen_result( + status=1, msg='api not found')), 'application/json', None + method, call_arg_names = route + call_args = [args.get(name) for name in call_arg_names] + return method(*call_args) + + return call diff --git a/visualdl/component/inference/proto/model_config.protxt b/visualdl/component/inference/proto/model_config.protxt new file mode 100644 index 000000000..1751f02f7 --- /dev/null +++ b/visualdl/component/inference/proto/model_config.protxt @@ -0,0 +1,1981 @@ +// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright (c) 2018, TensorFlow Authors. All rights reserved. + +syntax = "proto3"; + +package inference; + +//@@.. cpp:namespace:: inference + +//@@ +//@@.. cpp:enum:: DataType +//@@ +//@@ Data types supported for input and output tensors. +//@@ +enum DataType { + //@@ .. cpp:enumerator:: DataType::INVALID = 0 + TYPE_INVALID = 0; + + //@@ .. cpp:enumerator:: DataType::BOOL = 1 + TYPE_BOOL = 1; + + //@@ .. cpp:enumerator:: DataType::UINT8 = 2 + TYPE_UINT8 = 2; + //@@ .. cpp:enumerator:: DataType::UINT16 = 3 + TYPE_UINT16 = 3; + //@@ .. cpp:enumerator:: DataType::UINT32 = 4 + TYPE_UINT32 = 4; + //@@ .. cpp:enumerator:: DataType::UINT64 = 5 + TYPE_UINT64 = 5; + + //@@ .. cpp:enumerator:: DataType::INT8 = 6 + TYPE_INT8 = 6; + //@@ .. cpp:enumerator:: DataType::INT16 = 7 + TYPE_INT16 = 7; + //@@ .. cpp:enumerator:: DataType::INT32 = 8 + TYPE_INT32 = 8; + //@@ .. cpp:enumerator:: DataType::INT64 = 9 + TYPE_INT64 = 9; + + //@@ .. cpp:enumerator:: DataType::FP16 = 10 + TYPE_FP16 = 10; + //@@ .. cpp:enumerator:: DataType::FP32 = 11 + TYPE_FP32 = 11; + //@@ .. cpp:enumerator:: DataType::FP64 = 12 + TYPE_FP64 = 12; + + //@@ .. cpp:enumerator:: DataType::STRING = 13 + TYPE_STRING = 13; + + //@@ .. cpp:enumerator:: DataType::BF16 = 14 + TYPE_BF16 = 14; +} + +//@@ +//@@ .. cpp:var:: message ModelRateLimiter +//@@ +//@@ The specifications required by the rate limiter to properly +//@@ schedule the inference requests across the different models +//@@ and their instances. +//@@ +message ModelRateLimiter +{ + //@@ .. cpp:var:: message Resource + //@@ + //@@ The resource property. + //@@ + message Resource + { + //@@ .. cpp:var:: string name + //@@ + //@@ The name associated with the resource. + //@@ + string name = 1; + + //@@ .. cpp:var:: bool global + //@@ + //@@ Whether or not the resource is global. If true then the resource + //@@ is assumed to be shared among the devices otherwise specified + //@@ count of the resource is assumed for each device associated + //@@ with the instance. + //@@ + bool global = 2; + + //@@ .. cpp:var:: uint32 count + //@@ + //@@ The number of resources required for the execution of the model + //@@ instance. + //@@ + uint32 count = 3; + } + + //@@ .. cpp:var:: Resource resources (repeated) + //@@ + //@@ The resources required to execute the request on a model instance. + //@@ Resources are just names with a corresponding count. The execution + //@@ of the instance will be blocked until the specificied resources are + //@@ available. By default an instance uses no rate-limiter resources. + //@@ + repeated Resource resources = 1; + + //@@ .. cpp:var:: uint32 priority + //@@ + //@@ The optional weighting value to be used for prioritizing across + //@@ instances. An instance with priority 2 will be given 1/2 the + //@@ number of scheduling chances as an instance_group with priority + //@@ 1. The default priority is 1. The priority of value 0 will be + //@@ treated as priority 1. + //@@ + uint32 priority = 2; +} + +//@@ +//@@.. cpp:var:: message ModelInstanceGroup +//@@ +//@@ A group of one or more instances of a model and resources made +//@@ available for those instances. +//@@ +message ModelInstanceGroup +{ + //@@ + //@@ .. cpp:enum:: Kind + //@@ + //@@ Kind of this instance group. + //@@ + enum Kind { + //@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0 + //@@ + //@@ This instance group represents instances that can run on either + //@@ CPU or GPU. If all GPUs listed in 'gpus' are available then + //@@ instances will be created on GPU(s), otherwise instances will + //@@ be created on CPU. + //@@ + KIND_AUTO = 0; + + //@@ .. cpp:enumerator:: Kind::KIND_GPU = 1 + //@@ + //@@ This instance group represents instances that must run on the + //@@ GPU. + //@@ + KIND_GPU = 1; + + //@@ .. cpp:enumerator:: Kind::KIND_CPU = 2 + //@@ + //@@ This instance group represents instances that must run on the + //@@ CPU. + //@@ + KIND_CPU = 2; + + //@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3 + //@@ + //@@ This instance group represents instances that should run on the + //@@ CPU and/or GPU(s) as specified by the model or backend itself. + //@@ The inference server will not override the model/backend + //@@ settings. + //@@ + KIND_MODEL = 3; + } + + //@@ + //@@ .. cpp:var:: message SecondaryDevice + //@@ + //@@ A secondary device required for a model instance. + //@@ + message SecondaryDevice + { + //@@ + //@@ .. cpp:enum:: SecondaryDeviceKind + //@@ + //@@ The kind of the secondary device. + //@@ + enum SecondaryDeviceKind { + //@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0 + //@@ + //@@ An NVDLA core. http://nvdla.org + //@@ Currently KIND_NVDLA is only supported by the TensorRT backend. + //@@ + KIND_NVDLA = 0; + } + + //@@ .. cpp:var:: SecondaryDeviceKind kind + //@@ + //@@ The secondary device kind. + //@@ + SecondaryDeviceKind kind = 1; + + //@@ .. cpp:var:: int64 device_id + //@@ + //@@ Identifier for the secondary device. + //@@ + int64 device_id = 2; + } + + //@@ .. cpp:var:: string name + //@@ + //@@ Optional name of this group of instances. If not specified the + //@@ name will be formed as _. The name of + //@@ individual instances will be further formed by a unique instance + //@@ number and GPU index: + //@@ + string name = 1; + + //@@ .. cpp:var:: Kind kind + //@@ + //@@ The kind of this instance group. Default is KIND_AUTO. If + //@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and + //@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid + //@@ and 'gpu' cannot be specified. + //@@ + Kind kind = 4; + + //@@ .. cpp:var:: int32 count + //@@ + //@@ For a group assigned to GPU, the number of instances created for + //@@ each GPU listed in 'gpus'. For a group assigned to CPU the number + //@@ of instances created. Default is 1. + int32 count = 2; + + //@@ .. cpp:var:: ModelRateLimiter rate_limiter + //@@ + //@@ The rate limiter specific settings to be associated with this + //@@ instance group. Optional, if not specified no rate limiting + //@@ will be applied to this instance group. + //@@ + ModelRateLimiter rate_limiter = 6; + + //@@ .. cpp:var:: int32 gpus (repeated) + //@@ + //@@ GPU(s) where instances should be available. For each GPU listed, + //@@ 'count' instances of the model will be available. Setting 'gpus' + //@@ to empty (or not specifying at all) is eqivalent to listing all + //@@ available GPUs. + //@@ + repeated int32 gpus = 3; + + //@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated) + //@@ + //@@ Secondary devices that are required by instances specified by this + //@@ instance group. Optional. + //@@ + repeated SecondaryDevice secondary_devices = 8; + + //@@ .. cpp:var:: string profile (repeated) + //@@ + //@@ For TensorRT models containing multiple optimization profile, this + //@@ parameter specifies a set of optimization profiles available to this + //@@ instance group. The inference server will choose the optimal profile + //@@ based on the shapes of the input tensors. This field should lie + //@@ between 0 and - 1 + //@@ and be specified only for TensorRT backend, otherwise an error will + //@@ be generated. If not specified, the server will select the first + //@@ optimization profile by default. + //@@ + repeated string profile = 5; + + //@@ .. cpp:var:: bool passive + //@@ + //@@ Whether the instances within this instance group will be accepting + //@@ inference requests from the scheduler. If true, the instances will + //@@ not be added to the scheduler. Default value is false. + //@@ + bool passive = 7; + + //@@ .. cpp:var:: string host_policy + //@@ + //@@ The host policy name that the instance to be associated with. + //@@ The default value is set to reflect the device kind of the instance, + //@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and + //@@ KIND_GPU is "gpu_". + //@@ + string host_policy = 9; +} + +//@@ +//@@.. cpp:var:: message ModelTensorReshape +//@@ +//@@ Reshape specification for input and output tensors. +//@@ +message ModelTensorReshape +{ + //@@ .. cpp:var:: int64 shape (repeated) + //@@ + //@@ The shape to use for reshaping. + //@@ + repeated int64 shape = 1; +} + +//@@ +//@@.. cpp:var:: message ModelInput +//@@ +//@@ An input required by the model. +//@@ +message ModelInput +{ + //@@ + //@@ .. cpp:enum:: Format + //@@ + //@@ The format for the input. + //@@ + enum Format { + //@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0 + //@@ + //@@ The input has no specific format. This is the default. + //@@ + FORMAT_NONE = 0; + + //@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1 + //@@ + //@@ HWC image format. Tensors with this format require 3 dimensions + //@@ if the model does not support batching (max_batch_size = 0) or 4 + //@@ dimensions if the model does support batching (max_batch_size + //@@ >= 1). In either case the 'dims' below should only specify the + //@@ 3 non-batch dimensions (i.e. HWC or CHW). + //@@ + FORMAT_NHWC = 1; + + //@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2 + //@@ + //@@ CHW image format. Tensors with this format require 3 dimensions + //@@ if the model does not support batching (max_batch_size = 0) or 4 + //@@ dimensions if the model does support batching (max_batch_size + //@@ >= 1). In either case the 'dims' below should only specify the + //@@ 3 non-batch dimensions (i.e. HWC or CHW). + //@@ + FORMAT_NCHW = 2; + } + + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the input. + //@@ + string name = 1; + + //@@ .. cpp:var:: DataType data_type + //@@ + //@@ The data-type of the input. + //@@ + DataType data_type = 2; + + //@@ .. cpp:var:: Format format + //@@ + //@@ The format of the input. Optional. + //@@ + Format format = 3; + + //@@ .. cpp:var:: int64 dims (repeated) + //@@ + //@@ The dimensions/shape of the input tensor that must be provided + //@@ when invoking the inference API for this model. + //@@ + repeated int64 dims = 4; + + //@@ .. cpp:var:: ModelTensorReshape reshape + //@@ + //@@ The shape expected for this input by the backend. The input will + //@@ be reshaped to this before being presented to the backend. The + //@@ reshape must have the same number of elements as the input shape + //@@ specified by 'dims'. Optional. + //@@ + ModelTensorReshape reshape = 5; + + //@@ .. cpp:var:: bool is_shape_tensor + //@@ + //@@ Whether or not the input is a shape tensor to the model. This field + //@@ is currently supported only for the TensorRT model. An error will be + //@@ generated if this specification does not comply with underlying + //@@ model. + //@@ + bool is_shape_tensor = 6; + + //@@ .. cpp:var:: bool allow_ragged_batch + //@@ + //@@ Whether or not the input is allowed to be "ragged" in a dynamically + //@@ created batch. Default is false indicating that two requests will + //@@ only be batched if this tensor has the same shape in both requests. + //@@ True indicates that two requests can be batched even if this tensor + //@@ has a different shape in each request. + //@@ + bool allow_ragged_batch = 7; + + //@@ .. cpp:var:: bool optional + //@@ + //@@ Whether or not the input is optional for the model execution. + //@@ If true, the input is not required in the inference request. + //@@ Default value is false. + //@@ + bool optional = 8; +} + +//@@ +//@@.. cpp:var:: message ModelOutput +//@@ +//@@ An output produced by the model. +//@@ +message ModelOutput +{ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the output. + //@@ + string name = 1; + + //@@ .. cpp:var:: DataType data_type + //@@ + //@@ The data-type of the output. + //@@ + DataType data_type = 2; + + //@@ .. cpp:var:: int64 dims (repeated) + //@@ + //@@ The dimensions/shape of the output tensor. + //@@ + repeated int64 dims = 3; + + //@@ .. cpp:var:: ModelTensorReshape reshape + //@@ + //@@ The shape produced for this output by the backend. The output will + //@@ be reshaped from this to the shape specifed in 'dims' before being + //@@ returned in the inference response. The reshape must have the same + //@@ number of elements as the output shape specified by 'dims'. Optional. + //@@ + ModelTensorReshape reshape = 5; + + //@@ .. cpp:var:: string label_filename + //@@ + //@@ The label file associated with this output. Should be specified only + //@@ for outputs that represent classifications. Optional. + //@@ + string label_filename = 4; + + + //@@ .. cpp:var:: bool is_shape_tensor + //@@ + //@@ Whether or not the output is a shape tensor to the model. This field + //@@ is currently supported only for the TensorRT model. An error will be + //@@ generated if this specification does not comply with underlying + //@@ model. + //@@ + bool is_shape_tensor = 6; +} + +//@@ .. cpp:var:: message BatchInput +//@@ +//@@ A batch input is an additional input that must be added by +//@@ the backend based on all the requests in a batch. +//@@ +message BatchInput +{ + //@@ + //@@ .. cpp:enum:: Kind + //@@ + //@@ The kind of the batch input. + //@@ + enum Kind { + //@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0 + //@@ + //@@ The element count of the 'source_input' will be added as + //@@ input with shape [1]. + //@@ + BATCH_ELEMENT_COUNT = 0; + + //@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1 + //@@ + //@@ The accumulated element count of the 'source_input' will be + //@@ added as input with shape [1]. For example, if there is a + //@@ batch of two request, each with 2 elements, an input of value + //@@ 2 will be added to the first request, and an input of value + //@@ 4 will be added to the second request. + //@@ + BATCH_ACCUMULATED_ELEMENT_COUNT = 1; + + //@@ .. cpp:enumerator:: + //@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2 + //@@ + //@@ The accumulated element count of the 'source_input' will be + //@@ added as input with shape [1], except for the first request + //@@ in the batch. For the first request in the batch, the input + //@@ will have shape [2] where the first element is value 0. + //@@ + BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2; + + //@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3 + //@@ + //@@ Among the requests in the batch, the max element count of the + //@@ 'source_input' will be added as input with shape + //@@ [max_element_count] for the first request in the batch. + //@@ For other requests, such input will be with shape [0]. + //@@ The data of the tensor will be uninitialized. + //@@ + BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3; + + //@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4 + //@@ + //@@ Among the requests in the batch, the shape of the + //@@ 'source_input' will be added as input with shape + //@@ [batch_size, len(input_dim)]. For example, if one + //@@ batch-2 input with shape [3, 1] and batch-1 input + //@@ with shape [2, 2] are batched, the batch input will + //@@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]]. + //@@ + BATCH_ITEM_SHAPE = 4; + + //@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5 + //@@ + //@@ Among the requests in the batch, the shape of the + //@@ 'source_input' will be added as input with single dimensional + //@@ shape [batch_size * len(input_dim)]. For example, if one + //@@ batch-2 input with shape [3, 1] and batch-1 input + //@@ with shape [2, 2] are batched, the batch input will + //@@ have shape [6] and value [3, 1, 3, 1, 2, 2]. + //@@ + BATCH_ITEM_SHAPE_FLATTEN = 5; + } + + //@@ .. cpp:var:: Kind kind + //@@ + //@@ The kind of this batch input. + //@@ + Kind kind = 1; + + //@@ .. cpp:var:: string target_name (repeated) + //@@ + //@@ The name of the model inputs that the backend will create + //@@ for this batch input. + //@@ + repeated string target_name = 2; + + //@@ .. cpp:var:: DataType data_type + //@@ + //@@ The input's datatype. The data type can be TYPE_INT32 or + //@@ TYPE_FP32. + //@@ + DataType data_type = 3; + + //@@ .. cpp:var:: string source_input (repeated) + //@@ + //@@ The backend derives the value for each batch input from one or + //@@ more other inputs. 'source_input' gives the names of those + //@@ inputs. + //@@ + repeated string source_input = 4; +} + +//@@.. cpp:var:: message BatchOutput +//@@ +//@@ A batch output is an output produced by the model that must be handled +//@@ differently by the backend based on all the requests in a batch. +//@@ +message BatchOutput +{ + //@@ + //@@ .. cpp:enum:: Kind + //@@ + //@@ The kind of the batch output. + //@@ + enum Kind { + //@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0 + //@@ + //@@ The output should be scattered according to the shape of + //@@ 'source_input'. The dynamic dimension of the output will + //@@ be set to the value of the same dimension in the input. + //@@ + BATCH_SCATTER_WITH_INPUT_SHAPE = 0; + } + + //@@ .. cpp:var:: string target_name (repeated) + //@@ + //@@ The name of the outputs to be produced by this batch output + //@@ specification. + //@@ + repeated string target_name = 1; + + //@@ .. cpp:var:: Kind kind + //@@ + //@@ The kind of this batch output. + //@@ + Kind kind = 2; + + //@@ .. cpp:var:: string source_input (repeated) + //@@ + //@@ The backend derives each batch output from one or more inputs. + //@@ 'source_input' gives the names of those inputs. + //@@ + repeated string source_input = 3; +} + +//@@ +//@@.. cpp:var:: message ModelVersionPolicy +//@@ +//@@ Policy indicating which versions of a model should be made +//@@ available by the inference server. +//@@ +message ModelVersionPolicy +{ + //@@ .. cpp:var:: message Latest + //@@ + //@@ Serve only the latest version(s) of a model. This is + //@@ the default policy. + //@@ + message Latest + { + //@@ .. cpp:var:: uint32 num_versions + //@@ + //@@ Serve only the 'num_versions' highest-numbered versions. T + //@@ The default value of 'num_versions' is 1, indicating that by + //@@ default only the single highest-number version of a + //@@ model will be served. + //@@ + uint32 num_versions = 1; + } + + //@@ .. cpp:var:: message All + //@@ + //@@ Serve all versions of the model. + //@@ + message All {} + + //@@ .. cpp:var:: message Specific + //@@ + //@@ Serve only specific versions of the model. + //@@ + message Specific + { + //@@ .. cpp:var:: int64 versions (repeated) + //@@ + //@@ The specific versions of the model that will be served. + //@@ + repeated int64 versions = 1; + } + + //@@ .. cpp:var:: oneof policy_choice + //@@ + //@@ Each model must implement only a single version policy. The + //@@ default policy is 'Latest'. + //@@ + oneof policy_choice + { + //@@ .. cpp:var:: Latest latest + //@@ + //@@ Serve only latest version(s) of the model. + //@@ + Latest latest = 1; + + //@@ .. cpp:var:: All all + //@@ + //@@ Serve all versions of the model. + //@@ + All all = 2; + + //@@ .. cpp:var:: Specific specific + //@@ + //@@ Serve only specific version(s) of the model. + //@@ + Specific specific = 3; + } +} + +//@@ +//@@.. cpp:var:: message ModelOptimizationPolicy +//@@ +//@@ Optimization settings for a model. These settings control if/how a +//@@ model is optimized and prioritized by the backend framework when +//@@ it is loaded. +//@@ +message ModelOptimizationPolicy +{ + //@@ + //@@ .. cpp:var:: message Graph + //@@ + //@@ Enable generic graph optimization of the model. If not specified + //@@ the framework's default level of optimization is used. Supports + //@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow + //@@ causes XLA to be enabled/disabled for the model. For Onnx defaults + //@@ to enabling all optimizations, -1 enables only basic optimizations, + //@@ +1 enables only basic and extended optimizations. + //@@ + message Graph + { + //@@ .. cpp:var:: int32 level + //@@ + //@@ The optimization level. Defaults to 0 (zero) if not specified. + //@@ + //@@ - -1: Disabled + //@@ - 0: Framework default + //@@ - 1+: Enable optimization level (greater values indicate + //@@ higher optimization levels) + //@@ + int32 level = 1; + } + + //@@ + //@@ .. cpp:enum:: ModelPriority + //@@ + //@@ Model priorities. A model will be given scheduling and execution + //@@ preference over models at lower priorities. Current model + //@@ priorities only work for TensorRT models. + //@@ + enum ModelPriority { + //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0 + //@@ + //@@ The default model priority. + //@@ + PRIORITY_DEFAULT = 0; + + //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1 + //@@ + //@@ The maximum model priority. + //@@ + PRIORITY_MAX = 1; + + //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2 + //@@ + //@@ The minimum model priority. + //@@ + PRIORITY_MIN = 2; + } + + //@@ + //@@ .. cpp:var:: message Cuda + //@@ + //@@ CUDA-specific optimization settings. + //@@ + message Cuda + { + //@@ .. cpp:var:: message GraphSpec + //@@ + //@@ Specification of the CUDA graph to be captured. + //@@ + message GraphSpec + { + //@@ .. cpp:var:: message Dims + //@@ + //@@ Specification of tensor dimension. + //@@ + message Shape + { + //@@ .. cpp:var:: int64 dim (repeated) + //@@ + //@@ The dimension. + //@@ + repeated int64 dim = 1; + } + + message LowerBound + { + //@@ .. cpp:var:: int32 batch_size + //@@ + //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0, + //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must + //@@ be set to value between 1 and 'max_batch_size'. + //@@ + int32 batch_size = 1; + + //@@ .. cpp:var:: map input + //@@ + //@@ The specification of the inputs. 'Shape' is the shape of + //@@ the input without batching dimension. + //@@ + map input = 2; + } + + //@@ .. cpp:var:: int32 batch_size + //@@ + //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0, + //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must + //@@ be set to value between 1 and 'max_batch_size'. + //@@ + int32 batch_size = 1; + + //@@ .. cpp:var:: map input + //@@ + //@@ The specification of the inputs. 'Shape' is the shape of the + //@@ input without batching dimension. + //@@ + map input = 2; + + //@@ .. cpp:var:: LowerBound graph_lower_bound + //@@ + //@@ Specify the lower bound of the CUDA graph. Optional. + //@@ If specified, the graph can be used for input shapes and + //@@ batch sizes that are in closed interval between the lower + //@@ bound specification and graph specification. For dynamic + //@@ shape model, this allows CUDA graphs to be launched + //@@ frequently without capturing all possible shape combinations. + //@@ However, using graph for shape combinations different from + //@@ the one used for capturing introduces uninitialized data for + //@@ execution and it may distort the inference result if + //@@ the model is sensitive to uninitialized data. + //@@ + LowerBound graph_lower_bound = 3; + } + + //@@ .. cpp:var:: bool graphs + //@@ + //@@ Use CUDA graphs API to capture model operations and execute + //@@ them more efficiently. Default value is false. + //@@ Currently only recognized by TensorRT backend. + //@@ + bool graphs = 1; + + //@@ .. cpp:var:: bool busy_wait_events + //@@ + //@@ Use busy-waiting to synchronize CUDA events to achieve minimum + //@@ latency from event complete to host thread to be notified, with + //@@ the cost of high CPU load. Default value is false. + //@@ Currently only recognized by TensorRT backend. + //@@ + bool busy_wait_events = 2; + + //@@ .. cpp:var:: GraphSpec graph_spec (repeated) + //@@ + //@@ Specification of the CUDA graph to be captured. If not specified + //@@ and 'graphs' is true, the default CUDA graphs will be captured + //@@ based on model settings. + //@@ Currently only recognized by TensorRT backend. + //@@ + repeated GraphSpec graph_spec = 3; + + //@@ .. cpp:var:: bool output_copy_stream + //@@ + //@@ Uses a CUDA stream separate from the inference stream to copy the + //@@ output to host. However, be aware that setting this option to + //@@ true will lead to an increase in the memory consumption of the + //@@ model as Triton will allocate twice as much GPU memory for its + //@@ I/O tensor buffers. Default value is false. + //@@ Currently only recognized by TensorRT backend. + //@@ + bool output_copy_stream = 4; + } + + //@@ + //@@ .. cpp:var:: message ExecutionAccelerators + //@@ + //@@ Specify the preferred execution accelerators to be used to execute + //@@ the model. Currently only recognized by ONNX Runtime backend and + //@@ TensorFlow backend. + //@@ + //@@ For ONNX Runtime backend, it will deploy the model with the execution + //@@ accelerators by priority, the priority is determined based on the + //@@ order that they are set, i.e. the provider at the front has highest + //@@ priority. Overall, the priority will be in the following order: + //@@ (if instance is on GPU) + //@@ CUDA Execution Provider (if instance is on GPU) + //@@ + //@@ Default CPU Execution Provider + //@@ + message ExecutionAccelerators + { + //@@ + //@@ .. cpp:var:: message Accelerator + //@@ + //@@ Specify the accelerator to be used to execute the model. + //@@ Accelerator with the same name may accept different parameters + //@@ depending on the backends. + //@@ + message Accelerator + { + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the execution accelerator. + //@@ + string name = 1; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Additional paremeters used to configure the accelerator. + //@@ + map parameters = 2; + } + + //@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated) + //@@ + //@@ The preferred execution provider to be used if the model instance + //@@ is deployed on GPU. + //@@ + //@@ For ONNX Runtime backend, possible value is "tensorrt" as name, + //@@ and no parameters are required. + //@@ + //@@ For TensorFlow backend, possible values are "tensorrt", + //@@ "auto_mixed_precision", "gpu_io". + //@@ + //@@ For "tensorrt", the following parameters can be specified: + //@@ "precision_mode": The precision used for optimization. + //@@ Allowed values are "FP32" and "FP16". Default value is "FP32". + //@@ + //@@ "max_cached_engines": The maximum number of cached TensorRT + //@@ engines in dynamic TensorRT ops. Default value is 100. + //@@ + //@@ "minimum_segment_size": The smallest model subgraph that will + //@@ be considered for optimization by TensorRT. Default value is 3. + //@@ + //@@ "max_workspace_size_bytes": The maximum GPU memory the model + //@@ can use temporarily during execution. Default value is 1GB. + //@@ + //@@ For "auto_mixed_precision", no parameters are required. If set, + //@@ the model will try to use FP16 for better performance. + //@@ This optimization can not be set with "tensorrt". + //@@ + //@@ For "gpu_io", no parameters are required. If set, the model will + //@@ be executed using TensorFlow Callable API to set input and output + //@@ tensors in GPU memory if possible, which can reduce data transfer + //@@ overhead if the model is used in ensemble. However, the Callable + //@@ object will be created on model creation and it will request all + //@@ outputs for every model execution, which may impact the + //@@ performance if a request does not require all outputs. This + //@@ optimization will only take affect if the model instance is + //@@ created with KIND_GPU. + //@@ + repeated Accelerator gpu_execution_accelerator = 1; + + //@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated) + //@@ + //@@ The preferred execution provider to be used if the model instance + //@@ is deployed on CPU. + //@@ + //@@ For ONNX Runtime backend, possible value is "openvino" as name, + //@@ and no parameters are required. + //@@ + repeated Accelerator cpu_execution_accelerator = 2; + } + + //@@ + //@@ .. cpp:var:: message PinnedMemoryBuffer + //@@ + //@@ Specify whether to use a pinned memory buffer when transferring data + //@@ between non-pinned system memory and GPU memory. Using a pinned + //@@ memory buffer for system from/to GPU transfers will typically provide + //@@ increased performance. For example, in the common use case where the + //@@ request provides inputs and delivers outputs via non-pinned system + //@@ memory, if the model instance accepts GPU IOs, the inputs will be + //@@ processed by two copies: from non-pinned system memory to pinned + //@@ memory, and from pinned memory to GPU memory. Similarly, pinned + //@@ memory will be used for delivering the outputs. + //@@ + message PinnedMemoryBuffer + { + //@@ .. cpp:var:: bool enable + //@@ + //@@ Use pinned memory buffer. Default is true. + //@@ + bool enable = 1; + } + + //@@ .. cpp:var:: Graph graph + //@@ + //@@ The graph optimization setting for the model. Optional. + //@@ + Graph graph = 1; + + //@@ .. cpp:var:: ModelPriority priority + //@@ + //@@ The priority setting for the model. Optional. + //@@ + ModelPriority priority = 2; + + //@@ .. cpp:var:: Cuda cuda + //@@ + //@@ CUDA-specific optimization settings. Optional. + //@@ + Cuda cuda = 3; + + //@@ .. cpp:var:: ExecutionAccelerators execution_accelerators + //@@ + //@@ The accelerators used for the model. Optional. + //@@ + ExecutionAccelerators execution_accelerators = 4; + + //@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory + //@@ + //@@ Use pinned memory buffer when the data transfer for inputs + //@@ is between GPU memory and non-pinned system memory. + //@@ Default is true. + //@@ + PinnedMemoryBuffer input_pinned_memory = 5; + + //@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory + //@@ + //@@ Use pinned memory buffer when the data transfer for outputs + //@@ is between GPU memory and non-pinned system memory. + //@@ Default is true. + //@@ + PinnedMemoryBuffer output_pinned_memory = 6; + + //@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold + //@@ + //@@ The backend may use a gather kernel to gather input data if the + //@@ device has direct access to the source buffer and the destination + //@@ buffer. In such case, the gather kernel will be used only if the + //@@ number of buffers to be gathered is greater or equal to + //@@ the specifed value. If 0, the gather kernel will be disabled. + //@@ Default value is 0. + //@@ Currently only recognized by TensorRT backend. + //@@ + uint32 gather_kernel_buffer_threshold = 7; + + //@@ .. cpp:var:: bool eager_batching + //@@ + //@@ Start preparing the next batch before the model instance is ready + //@@ for the next inference. This option can be used to overlap the + //@@ batch preparation with model execution, with the trade-off that + //@@ the next batch might be smaller than what it could have been. + //@@ Default value is false. + //@@ Currently only recognized by TensorRT backend. + //@@ + bool eager_batching = 8; +} + +//@@ +//@@.. cpp:var:: message ModelQueuePolicy +//@@ +//@@ Queue policy for inference requests. +//@@ +message ModelQueuePolicy +{ + //@@ + //@@ .. cpp:enum:: TimeoutAction + //@@ + //@@ The action applied to timed-out requests. + //@@ + enum TimeoutAction { + //@@ .. cpp:enumerator:: Action::REJECT = 0 + //@@ + //@@ Reject the request and return error message accordingly. + //@@ + REJECT = 0; + + //@@ .. cpp:enumerator:: Action::DELAY = 1 + //@@ + //@@ Delay the request until all other requests at the same + //@@ (or higher) priority levels that have not reached their timeouts + //@@ are processed. A delayed request will eventually be processed, + //@@ but may be delayed indefinitely due to newly arriving requests. + //@@ + DELAY = 1; + } + + //@@ + //@@ .. cpp:var:: TimeoutAction timeout_action + //@@ + //@@ The action applied to timed-out request. + //@@ The default action is REJECT. + //@@ + TimeoutAction timeout_action = 1; + + //@@ + //@@ .. cpp:var:: uint64 default_timeout_microseconds + //@@ + //@@ The default timeout for every request, in microseconds. + //@@ The default value is 0 which indicates that no timeout is set. + //@@ + uint64 default_timeout_microseconds = 2; + + //@@ + //@@ .. cpp:var:: bool allow_timeout_override + //@@ + //@@ Whether individual request can override the default timeout value. + //@@ When true, individual requests can set a timeout that is less than + //@@ the default timeout value but may not increase the timeout. + //@@ The default value is false. + //@@ + bool allow_timeout_override = 3; + + //@@ + //@@ .. cpp:var:: uint32 max_queue_size + //@@ + //@@ The maximum queue size for holding requests. A request will be + //@@ rejected immediately if it can't be enqueued because the queue is + //@@ full. The default value is 0 which indicates that no maximum + //@@ queue size is enforced. + //@@ + uint32 max_queue_size = 4; +} + +//@@ +//@@.. cpp:var:: message ModelDynamicBatching +//@@ +//@@ Dynamic batching configuration. These settings control how dynamic +//@@ batching operates for the model. +//@@ +message ModelDynamicBatching +{ + //@@ .. cpp:var:: int32 preferred_batch_size (repeated) + //@@ + //@@ Preferred batch sizes for dynamic batching. If a batch of one of + //@@ these sizes can be formed it will be executed immediately. If + //@@ not specified a preferred batch size will be chosen automatically + //@@ based on model and GPU characteristics. + //@@ + repeated int32 preferred_batch_size = 1; + + //@@ .. cpp:var:: uint64 max_queue_delay_microseconds + //@@ + //@@ The maximum time, in microseconds, a request will be delayed in + //@@ the scheduling queue to wait for additional requests for + //@@ batching. Default is 0. + //@@ + uint64 max_queue_delay_microseconds = 2; + + //@@ .. cpp:var:: bool preserve_ordering + //@@ + //@@ Should the dynamic batcher preserve the ordering of responses to + //@@ match the order of requests received by the scheduler. Default is + //@@ false. If true, the responses will be returned in the same order as + //@@ the order of requests sent to the scheduler. If false, the responses + //@@ may be returned in arbitrary order. This option is specifically + //@@ needed when a sequence of related inference requests (i.e. inference + //@@ requests with the same correlation ID) are sent to the dynamic + //@@ batcher to ensure that the sequence responses are in the correct + //@@ order. + //@@ + bool preserve_ordering = 3; + + //@@ .. cpp:var:: uint32 priority_levels + //@@ + //@@ The number of priority levels to be enabled for the model, + //@@ the priority level starts from 1 and 1 is the highest priority. + //@@ Requests are handled in priority order with all priority 1 requests + //@@ processed before priority 2, all priority 2 requests processed before + //@@ priority 3, etc. Requests with the same priority level will be + //@@ handled in the order that they are received. + //@@ + uint32 priority_levels = 4; + + //@@ .. cpp:var:: uint32 default_priority_level + //@@ + //@@ The priority level used for requests that don't specify their + //@@ priority. The value must be in the range [ 1, 'priority_levels' ]. + //@@ + uint32 default_priority_level = 5; + + //@@ .. cpp:var:: ModelQueuePolicy default_queue_policy + //@@ + //@@ The default queue policy used for requests that don't require + //@@ priority handling and requests that specify priority levels where + //@@ there is no specific policy given. If not specified, a policy with + //@@ default field values will be used. + //@@ + ModelQueuePolicy default_queue_policy = 6; + + //@@ .. cpp:var:: map priority_queue_policy + //@@ + //@@ Specify the queue policy for the priority level. The default queue + //@@ policy will be used if a priority level doesn't specify a queue + //@@ policy. + //@@ + map priority_queue_policy = 7; +} + +//@@ +//@@.. cpp:var:: message ModelSequenceBatching +//@@ +//@@ Sequence batching configuration. These settings control how sequence +//@@ batching operates for the model. +//@@ +message ModelSequenceBatching +{ + //@@ .. cpp:var:: message Control + //@@ + //@@ A control is a signal that the sequence batcher uses to + //@@ communicate with a backend. + //@@ + message Control + { + //@@ + //@@ .. cpp:enum:: Kind + //@@ + //@@ The kind of the control. + //@@ + enum Kind { + //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0 + //@@ + //@@ A new sequence is/is-not starting. If true a sequence is + //@@ starting, if false a sequence is continuing. Must + //@@ specify either int32_false_true, fp32_false_true or + //@@ bool_false_true for this control. This control is optional. + //@@ + CONTROL_SEQUENCE_START = 0; + + //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1 + //@@ + //@@ A sequence is/is-not ready for inference. If true the + //@@ input tensor data is valid and should be used. If false + //@@ the input tensor data is invalid and inferencing should + //@@ be "skipped". Must specify either int32_false_true, + //@@ fp32_false_true or bool_false_true for this control. This + //@@ control is optional. + //@@ + CONTROL_SEQUENCE_READY = 1; + + //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2 + //@@ + //@@ A sequence is/is-not ending. If true a sequence is + //@@ ending, if false a sequence is continuing. Must specify + //@@ either int32_false_true, fp32_false_true or bool_false_true + //@@ for this control. This control is optional. + //@@ + CONTROL_SEQUENCE_END = 2; + + //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3 + //@@ + //@@ The correlation ID of the sequence. The correlation ID + //@@ is an uint64_t value that is communicated in whole or + //@@ in part by the tensor. The tensor's datatype must be + //@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64, + //@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified + //@@ the correlation ID will be truncated to the low-order 32 + //@@ bits. This control is optional. + //@@ + CONTROL_SEQUENCE_CORRID = 3; + } + + //@@ .. cpp:var:: Kind kind + //@@ + //@@ The kind of this control. + //@@ + Kind kind = 1; + + //@@ .. cpp:var:: int32 int32_false_true (repeated) + //@@ + //@@ The control's true and false setting is indicated by setting + //@@ a value in an int32 tensor. The tensor must be a + //@@ 1-dimensional tensor with size equal to the batch size of + //@@ the request. 'int32_false_true' must have two entries: the + //@@ first the false value and the second the true value. + //@@ + repeated int32 int32_false_true = 2; + + //@@ .. cpp:var:: float fp32_false_true (repeated) + //@@ + //@@ The control's true and false setting is indicated by setting + //@@ a value in a fp32 tensor. The tensor must be a + //@@ 1-dimensional tensor with size equal to the batch size of + //@@ the request. 'fp32_false_true' must have two entries: the + //@@ first the false value and the second the true value. + //@@ + repeated float fp32_false_true = 3; + + //@@ .. cpp:var:: bool bool_false_true (repeated) + //@@ + //@@ The control's true and false setting is indicated by setting + //@@ a value in a bool tensor. The tensor must be a + //@@ 1-dimensional tensor with size equal to the batch size of + //@@ the request. 'bool_false_true' must have two entries: the + //@@ first the false value and the second the true value. + //@@ + repeated bool bool_false_true = 5; + + //@@ .. cpp:var:: DataType data_type + //@@ + //@@ The control's datatype. + //@@ + DataType data_type = 4; + } + + //@@ .. cpp:var:: message ControlInput + //@@ + //@@ The sequence control values to communicate by a model input. + //@@ + message ControlInput + { + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model input. + //@@ + string name = 1; + + //@@ .. cpp:var:: Control control (repeated) + //@@ + //@@ The control value(s) that should be communicated to the + //@@ model using this model input. + //@@ + repeated Control control = 2; + } + + //@@ + //@@ .. cpp:var:: message InitialState + //@@ + //@@ Settings used to initialize data for implicit state. + //@@ + message InitialState + { + //@@ .. cpp:var:: DataType data_type + //@@ + //@@ The data-type of the state. + //@@ + DataType data_type = 1; + + //@@ .. cpp:var:: int64 dims (repeated) + //@@ + //@@ The shape of the state tensor, not including the batch dimension. + //@@ + repeated int64 dims = 2; + + //@@ .. cpp:var:: oneof state_data + //@@ + //@@ Specify how the initial state data is generated. + //@@ + oneof state_data + { + //@@ + //@@ .. cpp:var:: bool zero_data + //@@ + //@@ The identifier for using zeros as initial state data. + //@@ Note that the value of 'zero_data' will not be checked, + //@@ instead, zero data will be used as long as the field is set. + //@@ + bool zero_data = 3; + + //@@ .. cpp:var:: string data_file + //@@ + //@@ The file whose content will be used as the initial data for + //@@ the state in row-major order. The file must be provided in + //@@ sub-directory 'initial_state' under the model directory. + //@@ + string data_file = 4; + } + + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the state initialization. + //@@ + string name = 5; + } + + //@@ .. cpp:var:: message State + //@@ + //@@ An input / output pair of tensors that carry state for the sequence. + //@@ + message State + { + //@@ .. cpp:var:: string input_name + //@@ + //@@ The name of the model state input. + //@@ + string input_name = 1; + + //@@ .. cpp:var:: string output_name + //@@ + //@@ The name of the model state output. + //@@ + string output_name = 2; + + //@@ .. cpp:var:: DataType data_type + //@@ + //@@ The data-type of the state. + //@@ + DataType data_type = 3; + + //@@ .. cpp:var:: int64 dim (repeated) + //@@ + //@@ The dimension. + //@@ + repeated int64 dims = 4; + + //@@ .. cpp:var:: InitialState initial_state (repeated) + //@@ + //@@ The optional field to specify the initial state for the model. + //@@ + repeated InitialState initial_state = 5; + } + + //@@ .. cpp:var:: message StrategyDirect + //@@ + //@@ The sequence batcher uses a specific, unique batch + //@@ slot for each sequence. All inference requests in a + //@@ sequence are directed to the same batch slot in the same + //@@ model instance over the lifetime of the sequence. This + //@@ is the default strategy. + //@@ + message StrategyDirect + { + //@@ .. cpp:var:: uint64 max_queue_delay_microseconds + //@@ + //@@ The maximum time, in microseconds, a candidate request + //@@ will be delayed in the sequence batch scheduling queue to + //@@ wait for additional requests for batching. Default is 0. + //@@ + uint64 max_queue_delay_microseconds = 1; + + //@@ .. cpp:var:: float minimum_slot_utilization + //@@ + //@@ The minimum slot utilization that must be satisfied to + //@@ execute the batch before 'max_queue_delay_microseconds' expires. + //@@ For example, a value of 0.5 indicates that the batch should be + //@@ executed as soon as 50% or more of the slots are ready even if + //@@ the 'max_queue_delay_microseconds' timeout has not expired. + //@@ The default is 0.0, indicating that a batch will be executed + //@@ before 'max_queue_delay_microseconds' timeout expires if at least + //@@ one batch slot is ready. 'max_queue_delay_microseconds' will be + //@@ ignored unless minimum_slot_utilization is set to a non-zero + //@@ value. + //@@ + float minimum_slot_utilization = 2; + } + + //@@ .. cpp:var:: message StrategyOldest + //@@ + //@@ The sequence batcher maintains up to 'max_candidate_sequences' + //@@ candidate sequences. 'max_candidate_sequences' can be greater + //@@ than the model's 'max_batch_size'. For inferencing the batcher + //@@ chooses from the candidate sequences up to 'max_batch_size' + //@@ inference requests. Requests are chosen in an oldest-first + //@@ manner across all candidate sequences. A given sequence is + //@@ not guaranteed to be assigned to the same batch slot for + //@@ all inference requests of that sequence. + //@@ + message StrategyOldest + { + //@@ .. cpp:var:: int32 max_candidate_sequences + //@@ + //@@ Maximum number of candidate sequences that the batcher + //@@ maintains. Excess seqences are kept in an ordered backlog + //@@ and become candidates when existing candidate sequences + //@@ complete. + //@@ + int32 max_candidate_sequences = 1; + + //@@ .. cpp:var:: int32 preferred_batch_size (repeated) + //@@ + //@@ Preferred batch sizes for dynamic batching of candidate + //@@ sequences. If a batch of one of these sizes can be formed + //@@ it will be executed immediately. If not specified a + //@@ preferred batch size will be chosen automatically + //@@ based on model and GPU characteristics. + //@@ + repeated int32 preferred_batch_size = 2; + + //@@ .. cpp:var:: uint64 max_queue_delay_microseconds + //@@ + //@@ The maximum time, in microseconds, a candidate request + //@@ will be delayed in the dynamic batch scheduling queue to + //@@ wait for additional requests for batching. Default is 0. + //@@ + uint64 max_queue_delay_microseconds = 3; + } + + //@@ .. cpp:var:: oneof strategy_choice + //@@ + //@@ The strategy used by the sequence batcher. Default strategy + //@@ is 'direct'. + //@@ + oneof strategy_choice + { + //@@ .. cpp:var:: StrategyDirect direct + //@@ + //@@ StrategyDirect scheduling strategy. + //@@ + StrategyDirect direct = 3; + + //@@ .. cpp:var:: StrategyOldest oldest + //@@ + //@@ StrategyOldest scheduling strategy. + //@@ + StrategyOldest oldest = 4; + } + + //@@ .. cpp:var:: uint64 max_sequence_idle_microseconds + //@@ + //@@ The maximum time, in microseconds, that a sequence is allowed to + //@@ be idle before it is aborted. The inference server considers a + //@@ sequence idle when it does not have any inference request queued + //@@ for the sequence. If this limit is exceeded, the inference server + //@@ will free the sequence slot allocated by the sequence and make it + //@@ available for another sequence. If not specified (or specified as + //@@ zero) a default value of 1000000 (1 second) is used. + //@@ + uint64 max_sequence_idle_microseconds = 1; + + //@@ .. cpp:var:: ControlInput control_input (repeated) + //@@ + //@@ The model input(s) that the server should use to communicate + //@@ sequence start, stop, ready and similar control values to the + //@@ model. + //@@ + repeated ControlInput control_input = 2; + + //@@ .. cpp:var:: State state (repeated) + //@@ + //@@ The optional state that can be stored in Triton for performing + //@@ inference requests on a sequence. Each sequence holds an implicit + //@@ state local to itself. The output state tensor provided by the + //@@ model in 'output_name' field of the current inference request will + //@@ be transferred as an input tensor named 'input_name' in the next + //@@ request of the same sequence. The input state of the first request + //@@ in the sequence contains garbage data. + //@@ + repeated State state = 5; +} + +//@@ +//@@.. cpp:var:: message ModelEnsembling +//@@ +//@@ Model ensembling configuration. These settings specify the models that +//@@ compose the ensemble and how data flows between the models. +//@@ +message ModelEnsembling +{ + //@@ .. cpp:var:: message Step + //@@ + //@@ Each step specifies a model included in the ensemble, + //@@ maps ensemble tensor names to the model input tensors, + //@@ and maps model output tensors to ensemble tensor names + //@@ + message Step + { + //@@ .. cpp:var:: string model_name + //@@ + //@@ The name of the model to execute for this step of the ensemble. + //@@ + string model_name = 1; + + //@@ .. cpp:var:: int64 model_version + //@@ + //@@ The version of the model to use for inference. If -1 + //@@ the latest/most-recent version of the model is used. + //@@ + int64 model_version = 2; + + //@@ .. cpp:var:: map input_map + //@@ + //@@ Map from name of an input tensor on this step's model to ensemble + //@@ tensor name. The ensemble tensor must have the same data type and + //@@ shape as the model input. Each model input must be assigned to + //@@ one ensemble tensor, but the same ensemble tensor can be assigned + //@@ to multiple model inputs. + //@@ + map input_map = 3; + + //@@ .. cpp:var:: map output_map + //@@ + //@@ Map from name of an output tensor on this step's model to ensemble + //@@ tensor name. The data type and shape of the ensemble tensor will + //@@ be inferred from the model output. It is optional to assign all + //@@ model outputs to ensemble tensors. One ensemble tensor name + //@@ can appear in an output map only once. + //@@ + map output_map = 4; + } + + //@@ .. cpp:var:: Step step (repeated) + //@@ + //@@ The models and the input / output mappings used within the ensemble. + //@@ + repeated Step step = 1; +} + +//@@ +//@@.. cpp:var:: message ModelParameter +//@@ +//@@ A model parameter. +//@@ +message ModelParameter +{ + //@@ .. cpp:var:: string string_value + //@@ + //@@ The string value of the parameter. + //@@ + string string_value = 1; +} + +//@@ +//@@.. cpp:var:: message ModelWarmup +//@@ +//@@ Settings used to construct the request sample for model warmup. +//@@ +message ModelWarmup +{ + //@@ + //@@ .. cpp:var:: message Input + //@@ + //@@ Meta data associated with an input. + //@@ + message Input + { + //@@ .. cpp:var:: DataType data_type + //@@ + //@@ The data-type of the input. + //@@ + DataType data_type = 1; + + //@@ .. cpp:var:: int64 dims (repeated) + //@@ + //@@ The shape of the input tensor, not including the batch dimension. + //@@ + repeated int64 dims = 2; + + //@@ .. cpp:var:: oneof input_data_type + //@@ + //@@ Specify how the input data is generated. If the input has STRING + //@@ data type and 'random_data' is set, the data generation will fall + //@@ back to 'zero_data'. + //@@ + oneof input_data_type + { + //@@ + //@@ .. cpp:var:: bool zero_data + //@@ + //@@ The identifier for using zeros as input data. Note that the + //@@ value of 'zero_data' will not be checked, instead, zero data + //@@ will be used as long as the field is set. + //@@ + bool zero_data = 3; + + //@@ + //@@ .. cpp:var:: bool random_data + //@@ + //@@ The identifier for using random data as input data. Note that + //@@ the value of 'random_data' will not be checked, instead, + //@@ random data will be used as long as the field is set. + //@@ + bool random_data = 4; + + //@@ .. cpp:var:: string input_data_file + //@@ + //@@ The file whose content will be used as raw input data in + //@@ row-major order. The file must be provided in a sub-directory + //@@ 'warmup' under the model directory. The file contents should be + //@@ in binary format. For TYPE_STRING data-type, an element is + //@@ represented by a 4-byte unsigned integer giving the length + //@@ followed by the actual bytes. + //@@ + string input_data_file = 5; + } + } + + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the request sample. + //@@ + string name = 1; + + //@@ .. cpp:var:: uint32 batch_size + //@@ + //@@ The batch size of the inference request. This must be >= 1. For + //@@ models that don't support batching, batch_size must be 1. If + //@@ batch_size > 1, the 'inputs' specified below will be duplicated to + //@@ match the batch size requested. + //@@ + uint32 batch_size = 2; + + //@@ .. cpp:var:: map inputs + //@@ + //@@ The warmup meta data associated with every model input, including + //@@ control tensors. + //@@ + map inputs = 3; + + //@@ .. cpp:var:: uint32 count + //@@ + //@@ The number of iterations that this warmup sample will be executed. + //@@ For example, if this field is set to 2, 2 model executions using this + //@@ sample will be scheduled for warmup. Default value is 0 which + //@@ indicates that this sample will be used only once. + //@@ Note that for sequence model, 'count' may not work well + //@@ because the model often expect a valid sequence of requests which + //@@ should be represented by a series of warmup samples. 'count > 1' + //@@ essentially "resends" one of the sample, which may invalidate the + //@@ sequence and result in unexpected warmup failure. + //@@ + uint32 count = 4; +} + +//@@ +//@@ .. cpp:var:: message ModelOperations +//@@ +//@@ The metadata of libraries providing custom operations for this model. +//@@ +message ModelOperations +{ + //@@ .. cpp:var:: string op_library_filename (repeated) + //@@ + //@@ Optional paths of the libraries providing custom operations for + //@@ this model. Valid only for ONNX models. + //@@ + repeated string op_library_filename = 1; +} + +//@@ +//@@ .. cpp:var:: message ModelTransactionPolicy +//@@ +//@@ The specification that describes the nature of transactions +//@@ to be expected from the model. +//@@ +message ModelTransactionPolicy +{ + //@@ .. cpp:var:: bool decoupled + //@@ + //@@ Indicates whether responses generated by the model are decoupled with + //@@ the requests issued to it, which means the number of responses + //@@ generated by model may differ from number of requests issued, and + //@@ that the responses may be out of order relative to the order of + //@@ requests. The default is false, which means the model will generate + //@@ exactly one response for each request. + //@@ + bool decoupled = 1; +} + +//@@ +//@@.. cpp:var:: message ModelRepositoryAgents +//@@ +//@@ The repository agents for the model. +//@@ +message ModelRepositoryAgents +{ + //@@ + //@@ .. cpp:var:: message Agent + //@@ + //@@ A repository agent that should be invoked for the specified + //@@ repository actions for this model. + //@@ + message Agent + { + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the agent. + //@@ + string name = 1; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ The parameters for the agent. + //@@ + map parameters = 2; + } + + //@@ + //@@ .. cpp:var:: Agent agents (repeated) + //@@ + //@@ The ordered list of agents for the model. These agents will be + //@@ invoked in order to respond to repository actions occuring for the + //@@ model. + //@@ + repeated Agent agents = 1; +} + +//@@ +//@@.. cpp:var:: message ModelResponseCache +//@@ +//@@ The response cache setting for the model. +//@@ +message ModelResponseCache +{ + //@@ + //@@ .. cpp::var:: bool enable + //@@ + //@@ Whether or not to use response cache for the model. If True, the + //@@ responses from the model are cached and when identical request + //@@ is encountered, instead of going through the model execution, + //@@ the response from the cache is utilized. By default, response + //@@ cache is disabled for the models. + //@@ + bool enable = 1; +} + +//@@ +//@@.. cpp:var:: message ModelConfig +//@@ +//@@ A model configuration. +//@@ +message ModelConfig +{ + //@@ .. cpp:var:: string name + //@@ + //@@ The name of the model. + //@@ + string name = 1; + + //@@ .. cpp:var:: string platform + //@@ + //@@ The framework for the model. Possible values are + //@@ "tensorrt_plan", "tensorflow_graphdef", + //@@ "tensorflow_savedmodel", "onnxruntime_onnx", + //@@ "pytorch_libtorch". + //@@ + string platform = 2; + + //@@ .. cpp:var:: string backend + //@@ + //@@ The backend used by the model. + //@@ + string backend = 17; + + //@@ .. cpp:var:: ModelVersionPolicy version_policy + //@@ + //@@ Policy indicating which version(s) of the model will be served. + //@@ + ModelVersionPolicy version_policy = 3; + + //@@ .. cpp:var:: int32 max_batch_size + //@@ + //@@ Maximum batch size allowed for inference. This can only decrease + //@@ what is allowed by the model itself. A max_batch_size value of 0 + //@@ indicates that batching is not allowed for the model and the + //@@ dimension/shape of the input and output tensors must exactly + //@@ match what is specified in the input and output configuration. A + //@@ max_batch_size value > 0 indicates that batching is allowed and + //@@ so the model expects the input tensors to have an additional + //@@ initial dimension for the batching that is not specified in the + //@@ input (for example, if the model supports batched inputs of + //@@ 2-dimensional tensors then the model configuration will specify + //@@ the input shape as [ X, Y ] but the model will expect the actual + //@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0 + //@@ returned outputs will also have an additional initial dimension + //@@ for the batch. + //@@ + int32 max_batch_size = 4; + + //@@ .. cpp:var:: ModelInput input (repeated) + //@@ + //@@ The inputs request by the model. + //@@ + repeated ModelInput input = 5; + + //@@ .. cpp:var:: ModelOutput output (repeated) + //@@ + //@@ The outputs produced by the model. + //@@ + repeated ModelOutput output = 6; + + //@@ .. cpp:var:: BatchInput batch_input (repeated) + //@@ + //@@ The model input(s) that the server should use to communicate + //@@ batch related values to the model. + //@@ + repeated BatchInput batch_input = 20; + + //@@ .. cpp:var:: BatchOutput batch_output (repeated) + //@@ + //@@ The outputs produced by the model that requires special handling + //@@ by the model backend. + //@@ + repeated BatchOutput batch_output = 21; + + //@@ .. cpp:var:: ModelOptimizationPolicy optimization + //@@ + //@@ Optimization configuration for the model. If not specified + //@@ then default optimization policy is used. + //@@ + ModelOptimizationPolicy optimization = 12; + + //@@ .. cpp:var:: oneof scheduling_choice + //@@ + //@@ The scheduling policy for the model. If not specified the + //@@ default scheduling policy is used for the model. The default + //@@ policy is to execute each inference request independently. + //@@ + oneof scheduling_choice + { + //@@ .. cpp:var:: ModelDynamicBatching dynamic_batching + //@@ + //@@ If specified, enables the dynamic-batching scheduling + //@@ policy. With dynamic-batching the scheduler may group + //@@ together independent requests into a single batch to + //@@ improve inference throughput. + //@@ + ModelDynamicBatching dynamic_batching = 11; + + //@@ .. cpp:var:: ModelSequenceBatching sequence_batching + //@@ + //@@ If specified, enables the sequence-batching scheduling + //@@ policy. With sequence-batching, inference requests + //@@ with the same correlation ID are routed to the same + //@@ model instance. Multiple sequences of inference requests + //@@ may be batched together into a single batch to + //@@ improve inference throughput. + //@@ + ModelSequenceBatching sequence_batching = 13; + + //@@ .. cpp:var:: ModelEnsembling ensemble_scheduling + //@@ + //@@ If specified, enables the model-ensembling scheduling + //@@ policy. With model-ensembling, inference requests + //@@ will be processed according to the specification, such as an + //@@ execution sequence of models. The input specified in this model + //@@ config will be the input for the ensemble, and the output + //@@ specified will be the output of the ensemble. + //@@ + ModelEnsembling ensemble_scheduling = 15; + } + + //@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated) + //@@ + //@@ Instances of this model. If not specified, one instance + //@@ of the model will be instantiated on each available GPU. + //@@ + repeated ModelInstanceGroup instance_group = 7; + + //@@ .. cpp:var:: string default_model_filename + //@@ + //@@ Optional filename of the model file to use if a + //@@ compute-capability specific model is not specified in + //@@ :cpp:var:`cc_model_filenames`. If not specified the default name + //@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or + //@@ 'model.pt' depending on the model type. + //@@ + string default_model_filename = 8; + + //@@ .. cpp:var:: map cc_model_filenames + //@@ + //@@ Optional map from CUDA compute capability to the filename of + //@@ the model that supports that compute capability. The filename + //@@ refers to a file within the model version directory. + //@@ + map cc_model_filenames = 9; + + //@@ .. cpp:var:: map metric_tags + //@@ + //@@ Optional metric tags. User-specific key-value pairs for metrics + //@@ reported for this model. These tags are applied to the metrics + //@@ reported on the HTTP metrics port. + //@@ + map metric_tags = 10; + + //@@ .. cpp:var:: map parameters + //@@ + //@@ Optional model parameters. User-specified parameter values. + //@@ + map parameters = 14; + + //@@ .. cpp:var:: ModelWarmup model_warmup (repeated) + //@@ + //@@ Warmup setting of this model. If specified, all instances + //@@ will be run with the request samples in sequence before + //@@ serving the model. + //@@ This field can only be specified if the model is not an ensemble + //@@ model. + //@@ + repeated ModelWarmup model_warmup = 16; + + //@@ .. cpp:var:: ModelOperations model_operations + //@@ + //@@ Optional metadata of the libraries providing custom operations for + //@@ this model. + //@@ + ModelOperations model_operations = 18; + + //@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy + //@@ + //@@ Optional specification that describes the nature of transactions + //@@ to be expected from the model. + //@@ + ModelTransactionPolicy model_transaction_policy = 19; + + //@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents + //@@ + //@@ Optional specification of the agent(s) that should be invoked + //@@ with repository actions are performed for this model. + //@@ + ModelRepositoryAgents model_repository_agents = 23; + + //@@ .. cpp:var:: ModelResponseCache response_cache + //@@ + //@@ Optional setting for utilizing the response cache for this + //@@ model. + //@@ + ModelResponseCache response_cache = 24; +} \ No newline at end of file diff --git a/visualdl/component/inference/proto/model_config/protxt_pb2.py b/visualdl/component/inference/proto/model_config/protxt_pb2.py new file mode 100644 index 000000000..4d5b645d4 --- /dev/null +++ b/visualdl/component/inference/proto/model_config/protxt_pb2.py @@ -0,0 +1,855 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: model_config.protxt +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import enum_type_wrapper +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3' +) + +_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType'] +DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE) +TYPE_INVALID = 0 +TYPE_BOOL = 1 +TYPE_UINT8 = 2 +TYPE_UINT16 = 3 +TYPE_UINT32 = 4 +TYPE_UINT64 = 5 +TYPE_INT8 = 6 +TYPE_INT16 = 7 +TYPE_INT32 = 8 +TYPE_INT64 = 9 +TYPE_FP16 = 10 +TYPE_FP32 = 11 +TYPE_FP64 = 12 +TYPE_STRING = 13 +TYPE_BF16 = 14 + +_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter'] +_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource'] +_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup'] +_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[ + 'SecondaryDevice'] +_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape'] +_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput'] +_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput'] +_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput'] +_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput'] +_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy'] +_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest'] +_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All'] +_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[ + 'Specific'] +_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[ + 'ModelOptimizationPolicy'] +_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'Graph'] +_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'Cuda'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[ + 'GraphSpec'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ + 'Shape'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ + 'LowerBound'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[ + 'InputEntry'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ + 'InputEntry'] +_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'ExecutionAccelerators'] +_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[ + 'Accelerator'] +_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[ + 'ParametersEntry'] +_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'PinnedMemoryBuffer'] +_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy'] +_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[ + 'ModelDynamicBatching'] +_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[ + 'PriorityQueuePolicyEntry'] +_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[ + 'ModelSequenceBatching'] +_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'Control'] +_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'ControlInput'] +_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'InitialState'] +_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'State'] +_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'StrategyDirect'] +_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'StrategyOldest'] +_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling'] +_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step'] +_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[ + 'InputMapEntry'] +_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[ + 'OutputMapEntry'] +_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter'] +_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup'] +_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input'] +_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry'] +_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations'] +_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[ + 'ModelTransactionPolicy'] +_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[ + 'ModelRepositoryAgents'] +_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[ + 'Agent'] +_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[ + 'ParametersEntry'] +_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache'] +_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig'] +_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[ + 'CcModelFilenamesEntry'] +_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[ + 'MetricTagsEntry'] +_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[ + 'ParametersEntry'] +_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[ + 'SecondaryDeviceKind'] +_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind'] +_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format'] +_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind'] +_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind'] +_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[ + 'ModelPriority'] +_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[ + 'TimeoutAction'] +_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[ + 'Kind'] +ModelRateLimiter = _reflection.GeneratedProtocolMessageType( + 'ModelRateLimiter', + (_message.Message, ), + { + 'Resource': + _reflection.GeneratedProtocolMessageType( + 'Resource', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELRATELIMITER_RESOURCE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource) + }), + 'DESCRIPTOR': + _MODELRATELIMITER, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter) + }) +_sym_db.RegisterMessage(ModelRateLimiter) +_sym_db.RegisterMessage(ModelRateLimiter.Resource) + +ModelInstanceGroup = _reflection.GeneratedProtocolMessageType( + 'ModelInstanceGroup', + (_message.Message, ), + { + 'SecondaryDevice': + _reflection.GeneratedProtocolMessageType( + 'SecondaryDevice', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice) + }), + 'DESCRIPTOR': + _MODELINSTANCEGROUP, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup) + }) +_sym_db.RegisterMessage(ModelInstanceGroup) +_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice) + +ModelTensorReshape = _reflection.GeneratedProtocolMessageType( + 'ModelTensorReshape', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELTENSORRESHAPE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape) + }) +_sym_db.RegisterMessage(ModelTensorReshape) + +ModelInput = _reflection.GeneratedProtocolMessageType( + 'ModelInput', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELINPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelInput) + }) +_sym_db.RegisterMessage(ModelInput) + +ModelOutput = _reflection.GeneratedProtocolMessageType( + 'ModelOutput', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOUTPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOutput) + }) +_sym_db.RegisterMessage(ModelOutput) + +BatchInput = _reflection.GeneratedProtocolMessageType( + 'BatchInput', + (_message.Message, ), + { + 'DESCRIPTOR': _BATCHINPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.BatchInput) + }) +_sym_db.RegisterMessage(BatchInput) + +BatchOutput = _reflection.GeneratedProtocolMessageType( + 'BatchOutput', + (_message.Message, ), + { + 'DESCRIPTOR': _BATCHOUTPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.BatchOutput) + }) +_sym_db.RegisterMessage(BatchOutput) + +ModelVersionPolicy = _reflection.GeneratedProtocolMessageType( + 'ModelVersionPolicy', + (_message.Message, ), + { + 'Latest': + _reflection.GeneratedProtocolMessageType( + 'Latest', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest) + }), + 'All': + _reflection.GeneratedProtocolMessageType( + 'All', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELVERSIONPOLICY_ALL, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All) + }), + 'Specific': + _reflection.GeneratedProtocolMessageType( + 'Specific', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific) + }), + 'DESCRIPTOR': + _MODELVERSIONPOLICY, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy) + }) +_sym_db.RegisterMessage(ModelVersionPolicy) +_sym_db.RegisterMessage(ModelVersionPolicy.Latest) +_sym_db.RegisterMessage(ModelVersionPolicy.All) +_sym_db.RegisterMessage(ModelVersionPolicy.Specific) + +ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType( + 'ModelOptimizationPolicy', + (_message.Message, ), + { + 'Graph': + _reflection.GeneratedProtocolMessageType( + 'Graph', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph) + }), + 'Cuda': + _reflection.GeneratedProtocolMessageType( + 'Cuda', + (_message.Message, ), + { + 'GraphSpec': + _reflection.GeneratedProtocolMessageType( + 'GraphSpec', + (_message.Message, ), + { + 'Shape': + _reflection.GeneratedProtocolMessageType( + 'Shape', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape) + }), + 'LowerBound': + _reflection.GeneratedProtocolMessageType( + 'LowerBound', + (_message.Message, ), + { + 'InputEntry': + _reflection.GeneratedProtocolMessageType( + 'InputEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) + }), + 'InputEntry': + _reflection.GeneratedProtocolMessageType( + 'InputEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda) + }), + 'ExecutionAccelerators': + _reflection.GeneratedProtocolMessageType( + 'ExecutionAccelerators', + (_message.Message, ), + { + 'Accelerator': + _reflection.GeneratedProtocolMessageType( + 'Accelerator', + (_message.Message, ), + { + 'ParametersEntry': + _reflection.GeneratedProtocolMessageType( + 'ParametersEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators) + }), + 'PinnedMemoryBuffer': + _reflection.GeneratedProtocolMessageType( + 'PinnedMemoryBuffer', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy) + }) +_sym_db.RegisterMessage(ModelOptimizationPolicy) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) +_sym_db.RegisterMessage( + ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) +_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators) +_sym_db.RegisterMessage( + ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) +_sym_db.RegisterMessage( + ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) +_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer) + +ModelQueuePolicy = _reflection.GeneratedProtocolMessageType( + 'ModelQueuePolicy', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELQUEUEPOLICY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy) + }) +_sym_db.RegisterMessage(ModelQueuePolicy) + +ModelDynamicBatching = _reflection.GeneratedProtocolMessageType( + 'ModelDynamicBatching', + (_message.Message, ), + { + 'PriorityQueuePolicyEntry': + _reflection.GeneratedProtocolMessageType( + 'PriorityQueuePolicyEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry) + }), + 'DESCRIPTOR': + _MODELDYNAMICBATCHING, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching) + }) +_sym_db.RegisterMessage(ModelDynamicBatching) +_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry) + +ModelSequenceBatching = _reflection.GeneratedProtocolMessageType( + 'ModelSequenceBatching', + (_message.Message, ), + { + 'Control': + _reflection.GeneratedProtocolMessageType( + 'Control', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control) + }), + 'ControlInput': + _reflection.GeneratedProtocolMessageType( + 'ControlInput', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput) + }), + 'InitialState': + _reflection.GeneratedProtocolMessageType( + 'InitialState', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState) + }), + 'State': + _reflection.GeneratedProtocolMessageType( + 'State', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State) + }), + 'StrategyDirect': + _reflection.GeneratedProtocolMessageType( + 'StrategyDirect', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect) + }), + 'StrategyOldest': + _reflection.GeneratedProtocolMessageType( + 'StrategyOldest', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest) + }), + 'DESCRIPTOR': + _MODELSEQUENCEBATCHING, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching) + }) +_sym_db.RegisterMessage(ModelSequenceBatching) +_sym_db.RegisterMessage(ModelSequenceBatching.Control) +_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput) +_sym_db.RegisterMessage(ModelSequenceBatching.InitialState) +_sym_db.RegisterMessage(ModelSequenceBatching.State) +_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect) +_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest) + +ModelEnsembling = _reflection.GeneratedProtocolMessageType( + 'ModelEnsembling', + (_message.Message, ), + { + 'Step': + _reflection.GeneratedProtocolMessageType( + 'Step', + (_message.Message, ), + { + 'InputMapEntry': + _reflection.GeneratedProtocolMessageType( + 'InputMapEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry) + }), + 'OutputMapEntry': + _reflection.GeneratedProtocolMessageType( + 'OutputMapEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry) + }), + 'DESCRIPTOR': + _MODELENSEMBLING_STEP, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step) + }), + 'DESCRIPTOR': + _MODELENSEMBLING, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling) + }) +_sym_db.RegisterMessage(ModelEnsembling) +_sym_db.RegisterMessage(ModelEnsembling.Step) +_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry) +_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry) + +ModelParameter = _reflection.GeneratedProtocolMessageType( + 'ModelParameter', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELPARAMETER, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelParameter) + }) +_sym_db.RegisterMessage(ModelParameter) + +ModelWarmup = _reflection.GeneratedProtocolMessageType( + 'ModelWarmup', + (_message.Message, ), + { + 'Input': + _reflection.GeneratedProtocolMessageType( + 'Input', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELWARMUP_INPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input) + }), + 'InputsEntry': + _reflection.GeneratedProtocolMessageType( + 'InputsEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry) + }), + 'DESCRIPTOR': + _MODELWARMUP, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelWarmup) + }) +_sym_db.RegisterMessage(ModelWarmup) +_sym_db.RegisterMessage(ModelWarmup.Input) +_sym_db.RegisterMessage(ModelWarmup.InputsEntry) + +ModelOperations = _reflection.GeneratedProtocolMessageType( + 'ModelOperations', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOPERATIONS, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOperations) + }) +_sym_db.RegisterMessage(ModelOperations) + +ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType( + 'ModelTransactionPolicy', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELTRANSACTIONPOLICY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy) + }) +_sym_db.RegisterMessage(ModelTransactionPolicy) + +ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType( + 'ModelRepositoryAgents', + (_message.Message, ), + { + 'Agent': + _reflection.GeneratedProtocolMessageType( + 'Agent', + (_message.Message, ), + { + 'ParametersEntry': + _reflection.GeneratedProtocolMessageType( + 'ParametersEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry) + }), + 'DESCRIPTOR': + _MODELREPOSITORYAGENTS_AGENT, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent) + }), + 'DESCRIPTOR': + _MODELREPOSITORYAGENTS, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents) + }) +_sym_db.RegisterMessage(ModelRepositoryAgents) +_sym_db.RegisterMessage(ModelRepositoryAgents.Agent) +_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry) + +ModelResponseCache = _reflection.GeneratedProtocolMessageType( + 'ModelResponseCache', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELRESPONSECACHE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelResponseCache) + }) +_sym_db.RegisterMessage(ModelResponseCache) + +ModelConfig = _reflection.GeneratedProtocolMessageType( + 'ModelConfig', + (_message.Message, ), + { + 'CcModelFilenamesEntry': + _reflection.GeneratedProtocolMessageType( + 'CcModelFilenamesEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry) + }), + 'MetricTagsEntry': + _reflection.GeneratedProtocolMessageType( + 'MetricTagsEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry) + }), + 'ParametersEntry': + _reflection.GeneratedProtocolMessageType( + 'ParametersEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry) + }), + 'DESCRIPTOR': + _MODELCONFIG, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig) + }) +_sym_db.RegisterMessage(ModelConfig) +_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry) +_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry) +_sym_db.RegisterMessage(ModelConfig.ParametersEntry) + +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001' + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001' + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001' + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001' + _MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None + _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001' + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001' + _MODELWARMUP_INPUTSENTRY._options = None + _MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001' + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001' + _MODELCONFIG_CCMODELFILENAMESENTRY._options = None + _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001' + _MODELCONFIG_METRICTAGSENTRY._options = None + _MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001' + _MODELCONFIG_PARAMETERSENTRY._options = None + _MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001' + _DATATYPE._serialized_start = 8137 + _DATATYPE._serialized_end = 8387 + _MODELRATELIMITER._serialized_start = 35 + _MODELRATELIMITER._serialized_end = 185 + _MODELRATELIMITER_RESOURCE._serialized_start = 130 + _MODELRATELIMITER_RESOURCE._serialized_end = 185 + _MODELINSTANCEGROUP._serialized_start = 188 + _MODELINSTANCEGROUP._serialized_end = 707 + _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484 + _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640 + _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603 + _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640 + _MODELINSTANCEGROUP_KIND._serialized_start = 642 + _MODELINSTANCEGROUP_KIND._serialized_end = 707 + _MODELTENSORRESHAPE._serialized_start = 709 + _MODELTENSORRESHAPE._serialized_end = 744 + _MODELINPUT._serialized_start = 747 + _MODELINPUT._serialized_end = 1053 + _MODELINPUT_FORMAT._serialized_start = 994 + _MODELINPUT_FORMAT._serialized_end = 1053 + _MODELOUTPUT._serialized_start = 1056 + _MODELOUTPUT._serialized_end = 1234 + _BATCHINPUT._serialized_start = 1237 + _BATCHINPUT._serialized_end = 1582 + _BATCHINPUT_KIND._serialized_start = 1377 + _BATCHINPUT_KIND._serialized_end = 1582 + _BATCHOUTPUT._serialized_start = 1585 + _BATCHOUTPUT._serialized_end = 1728 + _BATCHOUTPUT_KIND._serialized_start = 1686 + _BATCHOUTPUT_KIND._serialized_end = 1728 + _MODELVERSIONPOLICY._serialized_start = 1731 + _MODELVERSIONPOLICY._serialized_end = 2003 + _MODELVERSIONPOLICY_LATEST._serialized_start = 1919 + _MODELVERSIONPOLICY_LATEST._serialized_end = 1949 + _MODELVERSIONPOLICY_ALL._serialized_start = 1951 + _MODELVERSIONPOLICY_ALL._serialized_end = 1956 + _MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958 + _MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986 + _MODELOPTIMIZATIONPOLICY._serialized_start = 2006 + _MODELOPTIMIZATIONPOLICY._serialized_end = 3795 + _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536 + _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558 + _MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561 + _MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682 + _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684 + _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720 + _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722 + _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795 + _MODELQUEUEPOLICY._serialized_start = 3798 + _MODELQUEUEPOLICY._serialized_end = 4017 + _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979 + _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017 + _MODELDYNAMICBATCHING._serialized_start = 4020 + _MODELDYNAMICBATCHING._serialized_end = 4431 + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344 + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431 + _MODELSEQUENCEBATCHING._serialized_start = 4434 + _MODELSEQUENCEBATCHING._serialized_end = 5697 + _MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759 + _MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064 + _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947 + _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064 + _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066 + _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153 + _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156 + _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294 + _MODELSEQUENCEBATCHING_STATE._serialized_start = 5297 + _MODELSEQUENCEBATCHING_STATE._serialized_end = 5469 + _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471 + _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559 + _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561 + _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678 + _MODELENSEMBLING._serialized_start = 5700 + _MODELENSEMBLING._serialized_end = 6049 + _MODELENSEMBLING_STEP._serialized_start = 5767 + _MODELENSEMBLING_STEP._serialized_end = 6049 + _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952 + _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999 + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001 + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049 + _MODELPARAMETER._serialized_start = 6051 + _MODELPARAMETER._serialized_end = 6089 + _MODELWARMUP._serialized_start = 6092 + _MODELWARMUP._serialized_end = 6437 + _MODELWARMUP_INPUT._serialized_start = 6209 + _MODELWARMUP_INPUT._serialized_end = 6360 + _MODELWARMUP_INPUTSENTRY._serialized_start = 6362 + _MODELWARMUP_INPUTSENTRY._serialized_end = 6437 + _MODELOPERATIONS._serialized_start = 6439 + _MODELOPERATIONS._serialized_end = 6485 + _MODELTRANSACTIONPOLICY._serialized_start = 6487 + _MODELTRANSACTIONPOLICY._serialized_end = 6530 + _MODELREPOSITORYAGENTS._serialized_start = 6533 + _MODELREPOSITORYAGENTS._serialized_end = 6763 + _MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615 + _MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763 + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633 + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682 + _MODELRESPONSECACHE._serialized_start = 6765 + _MODELRESPONSECACHE._serialized_end = 6801 + _MODELCONFIG._serialized_start = 6804 + _MODELCONFIG._serialized_end = 8134 + _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929 + _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984 + _MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986 + _MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035 + _MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037 + _MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113 +# @@protoc_insertion_point(module_scope) diff --git a/visualdl/server/app.py b/visualdl/server/app.py index 06dff8163..2ce5d4a0a 100644 --- a/visualdl/server/app.py +++ b/visualdl/server/app.py @@ -32,6 +32,7 @@ import visualdl.server from visualdl import __version__ +from visualdl.component.inference.fastdeploy_server import create_fastdeploy_api_call from visualdl.component.inference.model_convert_server import create_model_convert_api_call from visualdl.component.profiler.profiler_server import create_profiler_api_call from visualdl.server.api import create_api_call @@ -70,6 +71,7 @@ def create_app(args): # noqa: C901 api_call = create_api_call(args.logdir, args.model, args.cache_timeout) profiler_api_call = create_profiler_api_call(args.logdir) inference_api_call = create_model_convert_api_call() + fastdeploy_api_call = create_fastdeploy_api_call() if args.telemetry: update_util.PbUpdater(args.product).start() @@ -152,6 +154,15 @@ def serve_inference_api(method): return make_response( Response(data, mimetype=mimetype, headers=headers)) + @app.route(api_path + '/fastdeploy/', methods=["GET", "POST"]) + def serve_fastdeploy_api(method): + if request.method == 'POST': + data, mimetype, headers = fastdeploy_api_call(method, request.form) + else: + data, mimetype, headers = fastdeploy_api_call(method, request.args) + return make_response( + Response(data, mimetype=mimetype, headers=headers)) + @app.route(check_live_path) def check_live(): return '', 204 From a66fa1be4a44a6783789208e74e8792d70893799 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 22 Nov 2022 03:03:03 +0000 Subject: [PATCH 02/48] fix --- .../component/inference/fastdeploy_lib.py | 60 ++++++++++++------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 0f56b70d0..cd54ee0d0 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -1,4 +1,5 @@ -import multiprocessing +import os +import json from subprocess import CalledProcessError from subprocess import PIPE from subprocess import Popen @@ -10,45 +11,62 @@ def pbtxt2json(content: str): - ''' + ''' Convert protocol messages in text format to json format string. ''' - message = text_format.Parse(content, ModelConfig()) - json_string = json_format.MessageToJson(message) - return json_string + message = text_format.Parse(content, ModelConfig()) + json_string = json_format.MessageToJson(message) + return json_string def json2pbtxt(content: str): - ''' + ''' Convert json format string to protocol messages in text format. ''' - message = json_format.Parse(content, ModelConfig()) - text_proto = text_format.MessageToString(message) - return text_proto + message = json_format.Parse(content, ModelConfig()) + text_proto = text_format.MessageToString(message) + return text_proto + +def analyse_config(cur_dir:str): + ''' + Analyse the model config in specified directory. + Return a json object to describe configuration. + ''' + all_model_configs = {} + all_model_paths = {} + for parent_dir, sub_dirs, filenames in os.walk(cur_dir): + for filename in filenames: + if '.pbtxt' in filename: + model_name = os.path.basename(parent_dir) + all_model_paths[model_name] = parent_dir + json_config = json.loads(pbtxt2json(open(os.path.join(parent_dir, filename)).read())) + all_model_configs[model_name] = json_config + print(model_name) + print(json.dumps(json_config, indent=2)) def launch_process(kwargs: dict): - ''' + ''' Launch a fastdeploy server according to specified arguments. ''' - cmd = ['fastdeployserver'] - for key, value in kwargs.items(): - cmd.append('--{}'.format(key)) - cmd.append('{}'.foramt(value)) - p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True) - return p + cmd = ['fastdeployserver'] + for key, value in kwargs.items(): + cmd.append('--{}'.format(key)) + cmd.append('{}'.foramt(value)) + p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True) + return p def get_process_output(process): - ''' + ''' Get the standard output of a opened subprocess. ''' - for line in process.stdout: - yield line + for line in process.stdout: + yield line def kill_process(process): - ''' + ''' Stop a opened subprocess. ''' - process.kill() + process.kill() From 9a929753c790a6253eb032ae7c13668e0b33c188 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 23 Nov 2022 08:24:51 +0000 Subject: [PATCH 03/48] add code --- .../component/inference/fastdeploy_lib.py | 200 +++++++++++++++++- .../component/inference/fastdeploy_server.py | 7 +- 2 files changed, 197 insertions(+), 10 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index cd54ee0d0..0de435b1a 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -1,5 +1,7 @@ import os import json +import re +import copy from subprocess import CalledProcessError from subprocess import PIPE from subprocess import Popen @@ -34,15 +36,199 @@ def analyse_config(cur_dir:str): ''' all_model_configs = {} all_model_paths = {} - for parent_dir, sub_dirs, filenames in os.walk(cur_dir): + all_model_versions = {} + parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(None) # models can only put directory in model repository, + #so we should only search depth 1 directories. + for model_dir_name in sub_dirs: + model_dir, model_sub_dirs, filenames = os.walk(os.path.join(parent_dir, model_dir_name)).send(None) + model_name = os.path.basename(model_dir) for filename in filenames: if '.pbtxt' in filename: - model_name = os.path.basename(parent_dir) - all_model_paths[model_name] = parent_dir - json_config = json.loads(pbtxt2json(open(os.path.join(parent_dir, filename)).read())) - all_model_configs[model_name] = json_config - print(model_name) - print(json.dumps(json_config, indent=2)) + all_model_paths[model_name] = model_dir # store model path + json_config = json.loads(pbtxt2json(open(os.path.join(model_dir, filename)).read())) + all_model_configs[model_name] = json_config # store original config file content in json format + for model_sub_dir in model_sub_dirs: + if re.match('\d+', model_sub_dir): # version directory consists of numbers + for version_resource_file in os.listdir(os.path.join(model_dir, model_sub_dir)): + if model_name not in all_model_versions: + all_model_versions[model_name] = {} + if model_sub_dir not in all_model_versions[model_name]: + all_model_versions[model_name][model_sub_dir] = [] + all_model_versions[model_name][model_sub_dir].append(version_resource_file) + +def exchange_format_to_original_format(exchange_format): + ''' + Change config exchange format to original format. + ''' + ensembles = [] + models = [] + all_models = {} + if 'ensembles' in exchange_format: + emsembles = exchange_format['ensembles'] + if 'models' in exchange_format: + models = exchange_format['models'] + alls = ensembles + models + for model_config in alls: + # 1. add 'execution_accelerators' keyword + if 'optimization' in model_config: + optimization_config = model_config['optimization'] + del model_config['optimization'] + model_config['optimization'] = {} + model_config['optimization']['execution_accelerators'] = optimization_config + # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword + for accelerator_name, accelerator_items in optimization_config.items(): + reversed_accelerator_items = [] + for accelerator_item in accelerator_items: + transformed_accelerator_item = {} + for key, value in accelerator_item.items(): + if key == 'name': + transformed_accelerator_item[key] = value + else: + if 'parameters' not in transformed_accelerator_item: + transformed_accelerator_item['parameters'] = {} + transformed_accelerator_item['parameters'][key] = value + reversed_accelerator_items.append(transformed_accelerator_item) + del optimization_config[accelerator_name] + optimization_config[accelerator_name] = reversed_accelerator_items + + # 3. delete versions information + if 'versions' in model_config: + del model_config['versions'] + if 'platform' in model_config and model_config['platform'] == 'ensemble': # emsemble model + # 4. add 'ensembleScheduling' keyword + if 'step' in model_config: + step_configs = model_config['step'] + if 'ensembleScheduling' not in model_config: + model_config['ensembleScheduling'] = {} + model_config['ensembleScheduling']['step'] = step_configs + del model_config['step'] + # 5. remove two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars" + remove_list = [] + for model_config_in_step in step_configs: + if model_config_in_step['modelName'] == 'feed' or model_config_in_step['modelName'] == 'fetch': + remove_list.append(model_config_in_step) + continue + del model_config_in_step['modelType'] + del model_config_in_step['inputModels'] + del model_config_in_step['outputModels'] + del model_config_in_step['inputVars'] + del model_config_in_step['outputVars'] + all_models['name'] = model_config + return all_models + + +def original_format_to_exchange_format(original_format, version_info): + ''' + Change config original format to exchange format. + ''' + exchange_format = {} + exchange_format['ensembles'] = [] + exchange_format['models'] = [] + for model_name, model_config in original_format.items(): + # 1. remove 'execution_accelerators' keyword + # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' outside + transformed_config = copy.deepcopy(model_config) + if 'optimization' in model_config: + if 'execution_accelerators' in model_config['optimization']: + transformed_optimization_config = {} + for accelerator_name, accelerator_items in model_config['optimization']['execution_accelerators'].items(): + transformed_optimization_config[accelerator_name] = [] + for accelerator_item in accelerator_items: + transformed_accelerator_item = {} + for key, value in accelerator_item.items(): + if key == 'parameters': + for parameter_name, parameter_value in value.items(): + transformed_accelerator_item[parameter_name] = parameter_value + else: + transformed_accelerator_item[key] = value + transformed_optimization_config[accelerator_name].append(transformed_accelerator_item) + del transformed_config['optimization'] + transformed_config['optimization'] = transformed_optimization_config + # 3. add versions information + if model_name in version_info: + transformed_config[model_name]['versions'] = version_info[model_name] + if 'platform' in model_config and model_config['platform'] == 'ensemble': # emsemble model + # 4. remove ensembleScheduling + if 'ensembleScheduling' in model_config: + if 'step' in model_config['ensembleScheduling']: + del transformed_config['ensembleScheduling'] + transformed_config['step'] = model_config['ensembleScheduling']['step'] + # 5. add two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars" + for model_config_in_step in transformed_config['step']: + model_config_in_step['modelType'] = 'normal' + model_config_in_step['inputModels'] = [] + model_config_in_step['outputModels'] = [] + model_config_in_step['inputVars'] = [] + model_config_in_step['outputVars'] = [] + + transformed_config['step'].append({ + "modelName": "feed", + "modelType": "virtual", + "inputModels": [], + "outputModels": [], + "inputVars": [], + "outputVars": [] + }) + transformed_config['step'].append({ + "modelName": "fetch", + "modelType": "virtual", + "inputModels": [], + "outputModels": [], + "inputVars": [], + "outputVars": [] + }) + analyse_step_relationships(transformed_config['step'], transformed_config['input'], transformed_config['output']) + exchange_format['ensembles'].append(transformed_config) + elif 'backend' in model_config: # single model + exchange_format['models'].append(transformed_config) + return exchange_format + +def analyse_step_relationships(step_config, inputs, outputs): + ''' + Analyse model relationships in ensemble step. And fill "inputModels", "outputModels", "inputVars", "outputVars" in step_config. + step_config: step data in ensemble model config. + inputs: inputs in ensemble model config. + outputs: outputs in ensemble model config. + ''' + models_dict = {} + vars_dict = {} + for model_config_in_step in step_config: + models_dict[model_config_in_step['modelName']] = model_config_in_step + if model_config_in_step['modelType'] == 'virtual': + for var in inputs: + if var['name'] not in vars_dict: + vars_dict[var['name']] = {} + vars_dict[var['name']]['from_models'] = [] + vars_dict[var['name']]['to_models'] = [] + vars_dict[var['name']]['from_models'].append('feed') + for var in outputs: + if var['name'] not in vars_dict: + vars_dict[var['name']] = {} + vars_dict[var['name']]['from_models'] = [] + vars_dict[var['name']]['to_models'] = [] + vars_dict[var['name']]['to_models'].append('fetch') + else: + for var_placehold_name, var_name in model_config_in_step['inputMap'].items(): + if var_name not in vars_dict: + vars_dict[var_name] = {} + vars_dict[var_name]['from_models'] = [] + vars_dict[var_name]['to_models'] = [] + vars_dict[var_name]['to_models'].append(model_config_in_step['modelName']) + + for var_placehold_name, var_name in model_config_in_step['outputMap'].items(): + if var_name not in vars_dict: + vars_dict[var_name] = {} + vars_dict[var_name]['from_models'] = [] + vars_dict[var_name]['to_models'] = [] + vars_dict[var_name]['from_models'].append(model_config_in_step['modelName']) + for var_name, relationships in vars_dict.items(): + for from_model in relationships['from_models']: + models_dict[from_model]['outputVars'].append(var_name) + models_dict[from_model]['outputModels'].extend(relationships['to_models']) + for to_model in relationships['to_models']: + models_dict[to_model]['inputVars'].append(var_name) + models_dict[to_model]['inputModels'].extend(relationships['from_models']) + def launch_process(kwargs: dict): diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index d54a3a7a4..48705de47 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -57,7 +57,7 @@ def get_config(self, cur_dir): pass @result() - def config_update(self, cur_dir, model_name): + def config_update(self, cur_dir, model_name, config): pass @result() @@ -83,11 +83,12 @@ def create_fastdeploy_api_call(): api = FastDeployServerApi() routes = { 'get_directory': (api.get_directory, ['dir']), - 'config_update': (api.config_update, ['dir', 'name']), + 'config_update': (api.config_update, ['dir', 'name', 'config']), 'get_config': (api.get_config, ['dir']), 'start_server': (api.start_server, ['dir', 'args']), 'stop_server': (api.stop_server, ['server_id']), - 'get_server_output': (api.get_server_output, ['server_id']) + 'get_server_output': (api.get_server_output, ['server_id']), + 'test_server': (api.test_server_with_gradio, ['server_id']) } def call(path: str, args): From 65e7a4c6cd2438120348a72752b414c1823b9627 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 30 Nov 2022 02:20:24 +0000 Subject: [PATCH 04/48] fix --- .../component/inference/fastdeploy_lib.py | 50 +++++++++--------- .../component/inference/fastdeploy_server.py | 52 +++++++++++++++++-- visualdl/server/app.py | 44 ++++++++++++++++ 3 files changed, 118 insertions(+), 28 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 0de435b1a..d734c31e0 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -55,6 +55,7 @@ def analyse_config(cur_dir:str): if model_sub_dir not in all_model_versions[model_name]: all_model_versions[model_name][model_sub_dir] = [] all_model_versions[model_name][model_sub_dir].append(version_resource_file) + return all_model_configs, all_model_versions, all_model_paths def exchange_format_to_original_format(exchange_format): ''' @@ -64,7 +65,7 @@ def exchange_format_to_original_format(exchange_format): models = [] all_models = {} if 'ensembles' in exchange_format: - emsembles = exchange_format['ensembles'] + ensembles = exchange_format['ensembles'] if 'models' in exchange_format: models = exchange_format['models'] alls = ensembles + models @@ -74,23 +75,22 @@ def exchange_format_to_original_format(exchange_format): optimization_config = model_config['optimization'] del model_config['optimization'] model_config['optimization'] = {} - model_config['optimization']['execution_accelerators'] = optimization_config - # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword - for accelerator_name, accelerator_items in optimization_config.items(): - reversed_accelerator_items = [] - for accelerator_item in accelerator_items: - transformed_accelerator_item = {} - for key, value in accelerator_item.items(): - if key == 'name': - transformed_accelerator_item[key] = value - else: - if 'parameters' not in transformed_accelerator_item: - transformed_accelerator_item['parameters'] = {} - transformed_accelerator_item['parameters'][key] = value - reversed_accelerator_items.append(transformed_accelerator_item) - del optimization_config[accelerator_name] - optimization_config[accelerator_name] = reversed_accelerator_items - + model_config['optimization']['executionAccelerators'] = optimization_config + # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword + for accelerator_name, accelerator_items in optimization_config.items(): + reversed_accelerator_items = [] + for accelerator_item in accelerator_items: + transformed_accelerator_item = {} + for key, value in accelerator_item.items(): + if key == 'name': + transformed_accelerator_item[key] = value + else: + if 'parameters' not in transformed_accelerator_item: + transformed_accelerator_item['parameters'] = {} + transformed_accelerator_item['parameters'][key] = value + reversed_accelerator_items.append(transformed_accelerator_item) + del optimization_config[accelerator_name] + optimization_config[accelerator_name] = reversed_accelerator_items # 3. delete versions information if 'versions' in model_config: del model_config['versions'] @@ -113,7 +113,9 @@ def exchange_format_to_original_format(exchange_format): del model_config_in_step['outputModels'] del model_config_in_step['inputVars'] del model_config_in_step['outputVars'] - all_models['name'] = model_config + for remove_item in remove_list: + step_configs.remove(remove_item) + all_models[model_config['name']] = model_config return all_models @@ -125,13 +127,13 @@ def original_format_to_exchange_format(original_format, version_info): exchange_format['ensembles'] = [] exchange_format['models'] = [] for model_name, model_config in original_format.items(): - # 1. remove 'execution_accelerators' keyword - # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' outside + # 1. remove 'executionAccelerators' keyword + # 2. put parameters in 'cpuExecutionAccelerator' and 'gpuExecutionAccelerator' outside transformed_config = copy.deepcopy(model_config) if 'optimization' in model_config: - if 'execution_accelerators' in model_config['optimization']: + if 'executionAccelerators' in model_config['optimization']: transformed_optimization_config = {} - for accelerator_name, accelerator_items in model_config['optimization']['execution_accelerators'].items(): + for accelerator_name, accelerator_items in model_config['optimization']['executionAccelerators'].items(): transformed_optimization_config[accelerator_name] = [] for accelerator_item in accelerator_items: transformed_accelerator_item = {} @@ -146,7 +148,7 @@ def original_format_to_exchange_format(original_format, version_info): transformed_config['optimization'] = transformed_optimization_config # 3. add versions information if model_name in version_info: - transformed_config[model_name]['versions'] = version_info[model_name] + transformed_config['versions'] = version_info[model_name] if 'platform' in model_config and model_config['platform'] == 'ensemble': # emsemble model # 4. remove ensembleScheduling if 'ensembleScheduling' in model_config: diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 48705de47..ec9f99e1f 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -19,16 +19,23 @@ from collections import deque from pathlib import Path from threading import Lock +import threading +from multiprocessing import Process +import socket +import time +import requests from flask import request from .fastdeploy_lib import get_process_output -from .fastdeploy_lib import json2pbtxt +from .fastdeploy_lib import json2pbtxt,analyse_config +from .fastdeploy_lib import exchange_format_to_original_format, original_format_to_exchange_format from .fastdeploy_lib import kill_process from .fastdeploy_lib import launch_process from .fastdeploy_lib import pbtxt2json from visualdl.server.api import gen_result from visualdl.server.api import result +from .fastdeploy_client.client_app import create_gradio_client_app class FastDeployServerApi(object): @@ -36,6 +43,8 @@ def __init__(self): self.root_dir = Path(os.getcwd()) self.opened_servers = { } # Use to store the opened server process pid and process itself + self.client_port = None + self.model_paths = {} @result() def get_directory(self, cur_dir): @@ -54,11 +63,20 @@ def get_directory(self, cur_dir): @result() def get_config(self, cur_dir): - pass + all_model_configs, all_model_versions, all_model_paths = analyse_config(cur_dir) + for name, value in all_model_paths.items(): + self.model_paths[(Path(os.path.abspath(cur_dir)),name)] = value + return original_format_to_exchange_format(all_model_configs, all_model_versions) @result() def config_update(self, cur_dir, model_name, config): - pass + config = json.loads(config) + all_models = exchange_format_to_original_format(config) + model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), model_name)] + text_proto = json2pbtxt(json.dumps(all_models[model_name])) + with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f: + f.write(text_proto) + return @result() def start_server(self, configs): @@ -77,6 +95,32 @@ def stop_server(self, server_id): def get_server_output(self, server_id): stdout_generator = get_process_output(server_id) return stdout_generator + + def create_fastdeploy_client(self): + if self.client_port is None: + def get_free_tcp_port(): + tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) + tcp.bind(('localhost', 0)) + addr, port = tcp.getsockname() + tcp.close() + return port + + self.client_port = get_free_tcp_port() + app = create_gradio_client_app() + thread = Process(target=app.launch, kwargs={'server_port': self.client_port}) + thread.start() + + def check_alive(): + while True: + try: + requests.get('http://localhost:{}/'.format(self.client_port)) + break + except Exception: + time.sleep(1) + + check_alive() + return self.client_port def create_fastdeploy_api_call(): @@ -88,7 +132,7 @@ def create_fastdeploy_api_call(): 'start_server': (api.start_server, ['dir', 'args']), 'stop_server': (api.stop_server, ['server_id']), 'get_server_output': (api.get_server_output, ['server_id']), - 'test_server': (api.test_server_with_gradio, ['server_id']) + 'create_fastdeploy_client': (api.create_fastdeploy_client, []) } def call(path: str, args): diff --git a/visualdl/server/app.py b/visualdl/server/app.py index 2ce5d4a0a..2be0c91bd 100644 --- a/visualdl/server/app.py +++ b/visualdl/server/app.py @@ -162,6 +162,50 @@ def serve_fastdeploy_api(method): data, mimetype, headers = fastdeploy_api_call(method, request.args) return make_response( Response(data, mimetype=mimetype, headers=headers)) + + @app.route(api_path + '/fastdeploy/fastdeploy_client', methods=["GET", "POST"]) + def serve_fastdeploy_create_fastdeploy_client(): + try: + if request.method == 'POST': + fastdeploy_api_call('create_fastdeploy_client', request.form) + else: + fastdeploy_api_call('create_fastdeploy_client', request.args) + except Exception as e: + error_msg = '{}'.format(e) + return make_response(error_msg) + return redirect(api_path + "/fastdeploy/fastdeploy_client/app", code=302) + + @app.route(api_path + "/fastdeploy/fastdeploy_client/", methods=["GET", "POST"]) + def request_fastdeploy_create_fastdeploy_client_app(path: str): + ''' + Gradio app server url interface. We route urls for gradio app to gradio server. + + Args: + path(str): All resource path from gradio server. + + Returns: + Any thing from gradio server. + ''' + if request.method == 'POST': + port = fastdeploy_api_call('create_fastdeploy_client', request.form) + else: + port = fastdeploy_api_call('create_fastdeploy_client', request.args) + if path == 'app': + proxy_url = request.url.replace(request.host_url.rstrip('/') + api_path + '/fastdeploy/fastdeploy_client/app', + 'http://localhost:{}/'.format(port)) + else: + proxy_url = request.url.replace(request.host_url.rstrip('/') + api_path + '/fastdeploy/fastdeploy_client/', + 'http://localhost:{}/'.format(port)) + resp = requests.request(method=request.method, + url=proxy_url, + headers={key: value + for (key, value) in request.headers if key != 'Host'}, + data=request.get_data(), + cookies=request.cookies, + allow_redirects=False) + headers = [(name, value) for (name, value) in resp.raw.headers.items()] + response = Response(resp.content, resp.status_code, headers) + return response @app.route(check_live_path) def check_live(): From 5f00917f7b5e45bc34614017db70c7f78297e013 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 1 Dec 2022 10:47:42 +0000 Subject: [PATCH 05/48] fix --- visualdl/component/inference/fastdeploy_lib.py | 14 ++++++++++---- visualdl/component/inference/fastdeploy_server.py | 14 +++++++++----- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index d734c31e0..a7a63a981 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -5,6 +5,7 @@ from subprocess import CalledProcessError from subprocess import PIPE from subprocess import Popen +import select import google.protobuf.json_format as json_format import google.protobuf.text_format as text_format @@ -240,8 +241,8 @@ def launch_process(kwargs: dict): cmd = ['fastdeployserver'] for key, value in kwargs.items(): cmd.append('--{}'.format(key)) - cmd.append('{}'.foramt(value)) - p = Popen(cmd, stdout=PIPE, bufsize=1, universal_newlines=True) + cmd.append('{}'.format(value)) + p = Popen(cmd, stdout=PIPE, stderr=PIPE, bufsize=1, universal_newlines=True) return p @@ -249,8 +250,13 @@ def get_process_output(process): ''' Get the standard output of a opened subprocess. ''' - for line in process.stdout: - yield line + while process.poll() is None: + readlist,_, _ = select.select([process.stdout, process.stderr],[],[]) + for item in readlist: + data = item.readline() + if not data: + return + yield data def kill_process(process): diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index ec9f99e1f..b4493e4ea 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -80,27 +80,32 @@ def config_update(self, cur_dir, model_name, config): @result() def start_server(self, configs): + configs = json.loads(configs) process = launch_process(configs) self.opened_servers[process.pid] = process return process.pid @result() def stop_server(self, server_id): + server_id = int(server_id) if server_id not in self.opened_servers: return kill_process(self.opened_servers[server_id]) del self.opened_servers[server_id] - @result('text/plain') + @result('application/octet-stream') def get_server_output(self, server_id): - stdout_generator = get_process_output(server_id) + server_id = int(server_id) + if server_id not in self.opened_servers: + return + stdout_generator = get_process_output(self.opened_servers[server_id]) return stdout_generator def create_fastdeploy_client(self): if self.client_port is None: def get_free_tcp_port(): tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) + # tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) tcp.bind(('localhost', 0)) addr, port = tcp.getsockname() tcp.close() @@ -110,7 +115,6 @@ def get_free_tcp_port(): app = create_gradio_client_app() thread = Process(target=app.launch, kwargs={'server_port': self.client_port}) thread.start() - def check_alive(): while True: try: @@ -129,7 +133,7 @@ def create_fastdeploy_api_call(): 'get_directory': (api.get_directory, ['dir']), 'config_update': (api.config_update, ['dir', 'name', 'config']), 'get_config': (api.get_config, ['dir']), - 'start_server': (api.start_server, ['dir', 'args']), + 'start_server': (api.start_server, ['config']), 'stop_server': (api.stop_server, ['server_id']), 'get_server_output': (api.get_server_output, ['server_id']), 'create_fastdeploy_client': (api.create_fastdeploy_client, []) From 8af5ab94f54c5f02081c89a0f7adc2f08fc234e7 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 2 Dec 2022 14:11:17 +0800 Subject: [PATCH 06/48] add fastdeploy server component --- .../component/inference/fastdeploy_lib.py | 502 ++++++++++-------- .../component/inference/fastdeploy_server.py | 75 +-- visualdl/utils/dir.py | 11 +- 3 files changed, 338 insertions(+), 250 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index a7a63a981..096976b05 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -1,266 +1,340 @@ -import os +import copy import json +import os +import random import re -import copy -from subprocess import CalledProcessError -from subprocess import PIPE +import signal +import string from subprocess import Popen -import select +from subprocess import STDOUT import google.protobuf.json_format as json_format import google.protobuf.text_format as text_format from .proto.model_config.protxt_pb2 import ModelConfig +from visualdl.utils.dir import FASTDEPLOYSERVER_PATH def pbtxt2json(content: str): - ''' + ''' Convert protocol messages in text format to json format string. ''' - message = text_format.Parse(content, ModelConfig()) - json_string = json_format.MessageToJson(message) - return json_string + message = text_format.Parse(content, ModelConfig()) + json_string = json_format.MessageToJson(message) + return json_string def json2pbtxt(content: str): - ''' + ''' Convert json format string to protocol messages in text format. ''' - message = json_format.Parse(content, ModelConfig()) - text_proto = text_format.MessageToString(message) - return text_proto + message = json_format.Parse(content, ModelConfig()) + text_proto = text_format.MessageToString(message) + return text_proto -def analyse_config(cur_dir:str): - ''' + +def analyse_config(cur_dir: str): + ''' Analyse the model config in specified directory. Return a json object to describe configuration. ''' - all_model_configs = {} - all_model_paths = {} - all_model_versions = {} - parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(None) # models can only put directory in model repository, - #so we should only search depth 1 directories. - for model_dir_name in sub_dirs: - model_dir, model_sub_dirs, filenames = os.walk(os.path.join(parent_dir, model_dir_name)).send(None) - model_name = os.path.basename(model_dir) - for filename in filenames: - if '.pbtxt' in filename: - all_model_paths[model_name] = model_dir # store model path - json_config = json.loads(pbtxt2json(open(os.path.join(model_dir, filename)).read())) - all_model_configs[model_name] = json_config # store original config file content in json format - for model_sub_dir in model_sub_dirs: - if re.match('\d+', model_sub_dir): # version directory consists of numbers - for version_resource_file in os.listdir(os.path.join(model_dir, model_sub_dir)): - if model_name not in all_model_versions: - all_model_versions[model_name] = {} - if model_sub_dir not in all_model_versions[model_name]: - all_model_versions[model_name][model_sub_dir] = [] - all_model_versions[model_name][model_sub_dir].append(version_resource_file) - return all_model_configs, all_model_versions, all_model_paths + all_model_configs = {} + all_model_paths = {} + all_model_versions = {} + parent_dir, sub_dirs, filenames = os.walk(cur_dir).send( + None) # models can only put directory in model repository, + # so we should only search depth 1 directories. + for model_dir_name in sub_dirs: + model_dir, model_sub_dirs, filenames = os.walk( + os.path.join(parent_dir, model_dir_name)).send(None) + model_name = os.path.basename(model_dir) + for filename in filenames: + if 'config.pbtxt' in filename: + all_model_paths[model_name] = model_dir # store model path + json_config = json.loads( + pbtxt2json(open(os.path.join(model_dir, filename)).read())) + all_model_configs[ + model_name] = json_config # store original config file content in json format + for model_sub_dir in model_sub_dirs: + if re.match( + r'\d+', + model_sub_dir): # version directory consists of numbers + for version_resource_file in os.listdir( + os.path.join(model_dir, model_sub_dir)): + if model_name not in all_model_versions: + all_model_versions[model_name] = {} + if model_sub_dir not in all_model_versions[model_name]: + all_model_versions[model_name][model_sub_dir] = [] + all_model_versions[model_name][model_sub_dir].append( + version_resource_file) + return all_model_configs, all_model_versions, all_model_paths + def exchange_format_to_original_format(exchange_format): - ''' + ''' Change config exchange format to original format. ''' - ensembles = [] - models = [] - all_models = {} - if 'ensembles' in exchange_format: - ensembles = exchange_format['ensembles'] - if 'models' in exchange_format: - models = exchange_format['models'] - alls = ensembles + models - for model_config in alls: - # 1. add 'execution_accelerators' keyword - if 'optimization' in model_config: - optimization_config = model_config['optimization'] - del model_config['optimization'] - model_config['optimization'] = {} - model_config['optimization']['executionAccelerators'] = optimization_config - # 2. put parameters in 'cpu_execution_accelerator' and 'gpu_execution_accelerator' inside 'parameters' keyword - for accelerator_name, accelerator_items in optimization_config.items(): - reversed_accelerator_items = [] - for accelerator_item in accelerator_items: - transformed_accelerator_item = {} - for key, value in accelerator_item.items(): - if key == 'name': - transformed_accelerator_item[key] = value - else: - if 'parameters' not in transformed_accelerator_item: - transformed_accelerator_item['parameters'] = {} - transformed_accelerator_item['parameters'][key] = value - reversed_accelerator_items.append(transformed_accelerator_item) - del optimization_config[accelerator_name] - optimization_config[accelerator_name] = reversed_accelerator_items - # 3. delete versions information - if 'versions' in model_config: - del model_config['versions'] - if 'platform' in model_config and model_config['platform'] == 'ensemble': # emsemble model - # 4. add 'ensembleScheduling' keyword - if 'step' in model_config: - step_configs = model_config['step'] - if 'ensembleScheduling' not in model_config: - model_config['ensembleScheduling'] = {} - model_config['ensembleScheduling']['step'] = step_configs - del model_config['step'] - # 5. remove two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars" - remove_list = [] - for model_config_in_step in step_configs: - if model_config_in_step['modelName'] == 'feed' or model_config_in_step['modelName'] == 'fetch': - remove_list.append(model_config_in_step) - continue - del model_config_in_step['modelType'] - del model_config_in_step['inputModels'] - del model_config_in_step['outputModels'] - del model_config_in_step['inputVars'] - del model_config_in_step['outputVars'] - for remove_item in remove_list: - step_configs.remove(remove_item) - all_models[model_config['name']] = model_config - return all_models - + ensembles = [] + models = [] + all_models = {} + if 'ensembles' in exchange_format: + ensembles = exchange_format['ensembles'] + if 'models' in exchange_format: + models = exchange_format['models'] + alls = ensembles + models + for model_config in alls: + # 1. add 'executionAccelerators' keyword + if 'optimization' in model_config: + optimization_config = model_config['optimization'] + del model_config['optimization'] + model_config['optimization'] = {} + model_config['optimization'][ + 'executionAccelerators'] = optimization_config + # 2. delete versions information + if 'versions' in model_config: + del model_config['versions'] + if 'platform' in model_config and model_config[ + 'platform'] == 'ensemble': # emsemble model + # 3. add 'ensembleScheduling' keyword + if 'step' in model_config: + step_configs = model_config['step'] + if 'ensembleScheduling' not in model_config: + model_config['ensembleScheduling'] = {} + model_config['ensembleScheduling']['step'] = step_configs + del model_config['step'] + # 4. remove two virtual models(feed, fetch), and + # "modelType", "inputModels", "outputModels", "inputVars", "outputVars" + remove_list = [] + for model_config_in_step in step_configs: + if model_config_in_step[ + 'modelName'] == 'feed' or model_config_in_step[ + 'modelName'] == 'fetch': + remove_list.append(model_config_in_step) + continue + del model_config_in_step['modelType'] + del model_config_in_step['inputModels'] + del model_config_in_step['outputModels'] + del model_config_in_step['inputVars'] + del model_config_in_step['outputVars'] + for remove_item in remove_list: + step_configs.remove(remove_item) + all_models[model_config['name']] = model_config + return all_models + def original_format_to_exchange_format(original_format, version_info): - ''' + ''' Change config original format to exchange format. ''' - exchange_format = {} - exchange_format['ensembles'] = [] - exchange_format['models'] = [] - for model_name, model_config in original_format.items(): - # 1. remove 'executionAccelerators' keyword - # 2. put parameters in 'cpuExecutionAccelerator' and 'gpuExecutionAccelerator' outside - transformed_config = copy.deepcopy(model_config) - if 'optimization' in model_config: - if 'executionAccelerators' in model_config['optimization']: - transformed_optimization_config = {} - for accelerator_name, accelerator_items in model_config['optimization']['executionAccelerators'].items(): - transformed_optimization_config[accelerator_name] = [] - for accelerator_item in accelerator_items: - transformed_accelerator_item = {} - for key, value in accelerator_item.items(): - if key == 'parameters': - for parameter_name, parameter_value in value.items(): - transformed_accelerator_item[parameter_name] = parameter_value - else: - transformed_accelerator_item[key] = value - transformed_optimization_config[accelerator_name].append(transformed_accelerator_item) - del transformed_config['optimization'] - transformed_config['optimization'] = transformed_optimization_config - # 3. add versions information - if model_name in version_info: - transformed_config['versions'] = version_info[model_name] - if 'platform' in model_config and model_config['platform'] == 'ensemble': # emsemble model - # 4. remove ensembleScheduling - if 'ensembleScheduling' in model_config: - if 'step' in model_config['ensembleScheduling']: - del transformed_config['ensembleScheduling'] - transformed_config['step'] = model_config['ensembleScheduling']['step'] - # 5. add two virtual models(feed, fetch), and "modelType", "inputModels", "outputModels", "inputVars", "outputVars" - for model_config_in_step in transformed_config['step']: - model_config_in_step['modelType'] = 'normal' - model_config_in_step['inputModels'] = [] - model_config_in_step['outputModels'] = [] - model_config_in_step['inputVars'] = [] - model_config_in_step['outputVars'] = [] + exchange_format = {} + exchange_format['ensembles'] = [] + exchange_format['models'] = [] + # 0. transform version info into component format in frontend + for model_name, version_filenames_dict in version_info.items(): + version_info_for_frontend = [] + for version_name, filenames in version_filenames_dict.items(): + version_filenames_dict_for_frontend = {} + version_filenames_dict_for_frontend['title'] = version_name + version_filenames_dict_for_frontend['key'] = version_name + version_filenames_dict_for_frontend['children'] = [] + for filename in filenames: + version_filenames_dict_for_frontend['children'].append({ + 'title': + filename, + 'key': + filename + }) + version_info_for_frontend.append( + version_filenames_dict_for_frontend) + version_info[model_name] = version_info_for_frontend + + for model_name, model_config in original_format.items(): + # 1. remove 'executionAccelerators' keyword + transformed_config = copy.deepcopy(model_config) + if 'optimization' in model_config: + if 'executionAccelerators' in model_config['optimization']: + transformed_optimization_config = model_config['optimization'][ + 'executionAccelerators'] + del transformed_config['optimization'] + transformed_config[ + 'optimization'] = transformed_optimization_config + # 2. add versions information + if model_name in version_info: + transformed_config['versions'] = version_info[model_name] + if 'platform' in model_config and model_config[ + 'platform'] == 'ensemble': # emsemble model + # 3. remove ensembleScheduling + if 'ensembleScheduling' in model_config: + if 'step' in model_config['ensembleScheduling']: + del transformed_config['ensembleScheduling'] + transformed_config['step'] = model_config[ + 'ensembleScheduling']['step'] + # 4. add two virtual models(feed, fetch), and + # "modelType", "inputModels", "outputModels", "inputVars", "outputVars" + for model_config_in_step in transformed_config['step']: + model_config_in_step['modelType'] = 'normal' + model_config_in_step['inputModels'] = [] + model_config_in_step['outputModels'] = [] + model_config_in_step['inputVars'] = [] + model_config_in_step['outputVars'] = [] + + transformed_config['step'].append({ + "modelName": "feed", + "modelType": "virtual", + "inputModels": [], + "outputModels": [], + "inputVars": [], + "outputVars": [] + }) + transformed_config['step'].append({ + "modelName": "fetch", + "modelType": "virtual", + "inputModels": [], + "outputModels": [], + "inputVars": [], + "outputVars": [] + }) + analyse_step_relationships(transformed_config['step'], + transformed_config['input'], + transformed_config['output']) + exchange_format['ensembles'].append(transformed_config) + elif 'backend' in model_config: # single model + exchange_format['models'].append(transformed_config) + return exchange_format - transformed_config['step'].append({ - "modelName": "feed", - "modelType": "virtual", - "inputModels": [], - "outputModels": [], - "inputVars": [], - "outputVars": [] - }) - transformed_config['step'].append({ - "modelName": "fetch", - "modelType": "virtual", - "inputModels": [], - "outputModels": [], - "inputVars": [], - "outputVars": [] - }) - analyse_step_relationships(transformed_config['step'], transformed_config['input'], transformed_config['output']) - exchange_format['ensembles'].append(transformed_config) - elif 'backend' in model_config: # single model - exchange_format['models'].append(transformed_config) - return exchange_format def analyse_step_relationships(step_config, inputs, outputs): - ''' - Analyse model relationships in ensemble step. And fill "inputModels", "outputModels", "inputVars", "outputVars" in step_config. + ''' + Analyse model relationships in ensemble step. And fill \ + "inputModels", "outputModels", "inputVars", "outputVars" in step_config. step_config: step data in ensemble model config. inputs: inputs in ensemble model config. outputs: outputs in ensemble model config. ''' - models_dict = {} - vars_dict = {} - for model_config_in_step in step_config: - models_dict[model_config_in_step['modelName']] = model_config_in_step - if model_config_in_step['modelType'] == 'virtual': - for var in inputs: - if var['name'] not in vars_dict: - vars_dict[var['name']] = {} - vars_dict[var['name']]['from_models'] = [] - vars_dict[var['name']]['to_models'] = [] - vars_dict[var['name']]['from_models'].append('feed') - for var in outputs: - if var['name'] not in vars_dict: - vars_dict[var['name']] = {} - vars_dict[var['name']]['from_models'] = [] - vars_dict[var['name']]['to_models'] = [] - vars_dict[var['name']]['to_models'].append('fetch') - else: - for var_placehold_name, var_name in model_config_in_step['inputMap'].items(): - if var_name not in vars_dict: - vars_dict[var_name] = {} - vars_dict[var_name]['from_models'] = [] - vars_dict[var_name]['to_models'] = [] - vars_dict[var_name]['to_models'].append(model_config_in_step['modelName']) - - for var_placehold_name, var_name in model_config_in_step['outputMap'].items(): - if var_name not in vars_dict: - vars_dict[var_name] = {} - vars_dict[var_name]['from_models'] = [] - vars_dict[var_name]['to_models'] = [] - vars_dict[var_name]['from_models'].append(model_config_in_step['modelName']) - for var_name, relationships in vars_dict.items(): - for from_model in relationships['from_models']: - models_dict[from_model]['outputVars'].append(var_name) - models_dict[from_model]['outputModels'].extend(relationships['to_models']) - for to_model in relationships['to_models']: - models_dict[to_model]['inputVars'].append(var_name) - models_dict[to_model]['inputModels'].extend(relationships['from_models']) + models_dict = {} + vars_dict = {} + for model_config_in_step in step_config: + models_dict[model_config_in_step['modelName']] = model_config_in_step + if model_config_in_step['modelType'] == 'virtual': + for var in inputs: + if var['name'] not in vars_dict: + vars_dict[var['name']] = {} + vars_dict[var['name']]['from_models'] = [] + vars_dict[var['name']]['to_models'] = [] + vars_dict[var['name']]['from_models'].append('feed') + for var in outputs: + if var['name'] not in vars_dict: + vars_dict[var['name']] = {} + vars_dict[var['name']]['from_models'] = [] + vars_dict[var['name']]['to_models'] = [] + vars_dict[var['name']]['to_models'].append('fetch') + else: + for var_placehold_name, var_name in model_config_in_step[ + 'inputMap'].items(): + if var_name not in vars_dict: + vars_dict[var_name] = {} + vars_dict[var_name]['from_models'] = [] + vars_dict[var_name]['to_models'] = [] + vars_dict[var_name]['to_models'].append( + model_config_in_step['modelName']) + for var_placehold_name, var_name in model_config_in_step[ + 'outputMap'].items(): + if var_name not in vars_dict: + vars_dict[var_name] = {} + vars_dict[var_name]['from_models'] = [] + vars_dict[var_name]['to_models'] = [] + vars_dict[var_name]['from_models'].append( + model_config_in_step['modelName']) + for var_name, relationships in vars_dict.items(): + for from_model in relationships['from_models']: + models_dict[from_model]['outputVars'].append(var_name) + models_dict[from_model]['outputModels'].extend( + relationships['to_models']) + for to_model in relationships['to_models']: + models_dict[to_model]['inputVars'].append(var_name) + models_dict[to_model]['inputModels'].extend( + relationships['from_models']) def launch_process(kwargs: dict): - ''' + ''' Launch a fastdeploy server according to specified arguments. ''' - cmd = ['fastdeployserver'] - for key, value in kwargs.items(): - cmd.append('--{}'.format(key)) - cmd.append('{}'.format(value)) - p = Popen(cmd, stdout=PIPE, stderr=PIPE, bufsize=1, universal_newlines=True) - return p + cmd = ['fastdeployserver'] + for key, value in kwargs.items(): + cmd.append('--{}'.format(key)) + cmd.append('{}'.format(value)) + logfilename = 'logfile-{}'.format(get_random_string(8)) + while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)): + logfilename = 'logfile-{}'.format(get_random_string(8)) + p = Popen( + cmd, + stdout=open( + os.path.join(FASTDEPLOYSERVER_PATH, logfilename), 'w', + buffering=1), + stderr=STDOUT, + universal_newlines=True) + with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(p.pid)), + 'w') as f: + f.write( + logfilename + ) # filename ${p.pid} contain the real log filename ${logfilename} + return p -def get_process_output(process): - ''' + +def get_random_string(length): + # choose from all lowercase letter + letters = string.ascii_lowercase + result_str = ''.join([random.choice(letters) for i in range(length)]) + return result_str + + +def get_process_output(pid, length): + ''' Get the standard output of a opened subprocess. ''' - while process.poll() is None: - readlist,_, _ = select.select([process.stdout, process.stderr],[],[]) - for item in readlist: - data = item.readline() - if not data: - return - yield data + if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): + with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), + 'r') as f: + logfilename = f.read() + # delete file ${logfilename} if exists + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))): + with open( + os.path.join(FASTDEPLOYSERVER_PATH, + '{}'.format(logfilename)), 'r') as f: + f.seek(length) + data = f.read() + return data def kill_process(process): - ''' + ''' Stop a opened subprocess. ''' - process.kill() + if type(process) == int: # pid, use os.kill to terminate + pid = process + try: + os.kill(process, signal.SIGKILL) + # delete file ${pid} if exists + except Exception: + pass + else: + pid = process.pid + process.kill() + try: + process.wait(10) + except Exception: + pass + if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): + with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), + 'r') as f: + logfilename = f.read() + # delete file ${logfilename} if exists + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))): + os.remove( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))) + os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index b4493e4ea..450b36757 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -12,30 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. # ======================================================================= -import base64 import json import os -import tempfile -from collections import deque -from pathlib import Path -from threading import Lock -import threading -from multiprocessing import Process import socket import time +from multiprocessing import Process +from pathlib import Path import requests -from flask import request +from .fastdeploy_client.client_app import create_gradio_client_app +from .fastdeploy_lib import analyse_config +from .fastdeploy_lib import exchange_format_to_original_format from .fastdeploy_lib import get_process_output -from .fastdeploy_lib import json2pbtxt,analyse_config -from .fastdeploy_lib import exchange_format_to_original_format, original_format_to_exchange_format +from .fastdeploy_lib import json2pbtxt from .fastdeploy_lib import kill_process from .fastdeploy_lib import launch_process -from .fastdeploy_lib import pbtxt2json +from .fastdeploy_lib import original_format_to_exchange_format from visualdl.server.api import gen_result from visualdl.server.api import result -from .fastdeploy_client.client_app import create_gradio_client_app +from visualdl.utils.dir import FASTDEPLOYSERVER_PATH class FastDeployServerApi(object): @@ -63,16 +59,19 @@ def get_directory(self, cur_dir): @result() def get_config(self, cur_dir): - all_model_configs, all_model_versions, all_model_paths = analyse_config(cur_dir) + all_model_configs, all_model_versions, all_model_paths = analyse_config( + cur_dir) for name, value in all_model_paths.items(): - self.model_paths[(Path(os.path.abspath(cur_dir)),name)] = value - return original_format_to_exchange_format(all_model_configs, all_model_versions) + self.model_paths[(Path(os.path.abspath(cur_dir)), name)] = value + return original_format_to_exchange_format(all_model_configs, + all_model_versions) @result() def config_update(self, cur_dir, model_name, config): config = json.loads(config) all_models = exchange_format_to_original_format(config) - model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), model_name)] + model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), + model_name)] text_proto = json2pbtxt(json.dumps(all_models[model_name])) with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f: f.write(text_proto) @@ -88,21 +87,34 @@ def start_server(self, configs): @result() def stop_server(self, server_id): server_id = int(server_id) - if server_id not in self.opened_servers: - return - kill_process(self.opened_servers[server_id]) - del self.opened_servers[server_id] - - @result('application/octet-stream') - def get_server_output(self, server_id): + if server_id in self.opened_servers: # check if server_id in self.opened_servers + kill_process(self.opened_servers[server_id]) + del self.opened_servers[server_id] + elif str(server_id) in set( + os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in + # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn) + kill_process(server_id) + # check if there are servers killed by other vdl app instance and become zoombie + for server_id, process in self.opened_servers.items(): + if process.poll() is not None: + del self.opened_servers[server_id] + + @result('text/plain') + def get_server_output(self, server_id, length): server_id = int(server_id) - if server_id not in self.opened_servers: + length = int(length) + if server_id in self.opened_servers: # check if server_id in self.opened_servers + return get_process_output(server_id, length) + elif str(server_id) in set( + os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in + # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn) + return get_process_output(server_id, length) + else: return - stdout_generator = get_process_output(self.opened_servers[server_id]) - return stdout_generator - + def create_fastdeploy_client(self): if self.client_port is None: + def get_free_tcp_port(): tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) @@ -113,12 +125,15 @@ def get_free_tcp_port(): self.client_port = get_free_tcp_port() app = create_gradio_client_app() - thread = Process(target=app.launch, kwargs={'server_port': self.client_port}) + thread = Process( + target=app.launch, kwargs={'server_port': self.client_port}) thread.start() + def check_alive(): while True: try: - requests.get('http://localhost:{}/'.format(self.client_port)) + requests.get('http://localhost:{}/'.format( + self.client_port)) break except Exception: time.sleep(1) @@ -135,7 +150,7 @@ def create_fastdeploy_api_call(): 'get_config': (api.get_config, ['dir']), 'start_server': (api.start_server, ['config']), 'stop_server': (api.stop_server, ['server_id']), - 'get_server_output': (api.get_server_output, ['server_id']), + 'get_server_output': (api.get_server_output, ['server_id', 'length']), 'create_fastdeploy_client': (api.create_fastdeploy_client, []) } diff --git a/visualdl/utils/dir.py b/visualdl/utils/dir.py index 4e9ccd63f..13aada1c4 100644 --- a/visualdl/utils/dir.py +++ b/visualdl/utils/dir.py @@ -12,21 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # ======================================================================= - -import os import json - +import os VDL_SERVER = "https://www.paddlepaddle.org.cn/paddle/visualdl/service/server" -default_vdl_config = { - 'server_url': VDL_SERVER -} +default_vdl_config = {'server_url': VDL_SERVER} USER_HOME = os.path.expanduser('~') VDL_HOME = os.path.join(USER_HOME, '.visualdl') CONF_HOME = os.path.join(VDL_HOME, 'conf') CONFIG_PATH = os.path.join(CONF_HOME, 'config.json') +FASTDEPLOYSERVER_PATH = os.path.join(VDL_HOME, 'fastdeployserver') def init_vdl_config(): @@ -35,3 +32,5 @@ def init_vdl_config(): if not os.path.exists(CONFIG_PATH) or 0 == os.path.getsize(CONFIG_PATH): with open(CONFIG_PATH, 'w') as fp: fp.write(json.dumps(default_vdl_config)) + if not os.path.exists(FASTDEPLOYSERVER_PATH): + os.makedirs(FASTDEPLOYSERVER_PATH, exist_ok=True) From 380b950d97001fde35e695d889c6065ade8542fc Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 2 Dec 2022 14:31:33 +0800 Subject: [PATCH 07/48] add fastdeploy server and client --- requirements.txt | 4 + .../inference/fastdeploy_client/client_app.py | 381 ++++++++++++++++++ .../fastdeploy_client/http_client_manager.py | 92 +++++ .../inference/fastdeploy_client/visualizer.py | 102 +++++ 4 files changed, 579 insertions(+) create mode 100644 visualdl/component/inference/fastdeploy_client/client_app.py create mode 100644 visualdl/component/inference/fastdeploy_client/http_client_manager.py create mode 100644 visualdl/component/inference/fastdeploy_client/visualizer.py diff --git a/requirements.txt b/requirements.txt index 12588e37c..b3a3bd220 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,7 @@ multiprocess packaging x2paddle rarfile +gradio +fastdeploy-python +tritonclient[all] +attrdict diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py new file mode 100644 index 000000000..c2428e5ac --- /dev/null +++ b/visualdl/component/inference/fastdeploy_client/client_app.py @@ -0,0 +1,381 @@ +import gradio as gr +import numpy as np + +from .http_client_manager import HttpClientManager +from .visualizer import visualize_detection +from .visualizer import visualize_face_detection +from .visualizer import visualize_keypoint_detection +from .visualizer import visualize_matting +from .visualizer import visualize_ocr +from .visualizer import visualize_segmentation + +_http_manager = HttpClientManager() + +supported_tasks = { + 'detection': visualize_detection, + 'facedet': visualize_face_detection, + 'keypointdetection': visualize_keypoint_detection, + 'segmentation': visualize_segmentation, + 'matting': visualize_matting, + 'ocr': visualize_ocr, + 'others(raw data)': lambda x: str(x) +} + + +def create_gradio_client_app(): # noqa:C901 + css = """ + .gradio-container { + font-family: 'IBM Plex Sans', sans-serif; + } + .gr-button { + color: white; + border-color: black; + background: black; + } + input[type='range'] { + accent-color: black; + } + .dark input[type='range'] { + accent-color: #dfdfdf; + } + .container { + max-width: 1200px; + margin: auto; + padding-top: 1.5rem; + } + #gallery { + min-height: 22rem; + margin-bottom: 15px; + margin-left: auto; + margin-right: auto; + border-bottom-right-radius: .5rem !important; + border-bottom-left-radius: .5rem !important; + } + #gallery>div>.h-full { + min-height: 20rem; + } + .details:hover { + text-decoration: underline; + } + .gr-button { + white-space: nowrap; + } + .gr-button:focus { + border-color: rgb(147 197 253 / var(--tw-border-opacity)); + outline: none; + box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000); + --tw-border-opacity: 1; + --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) \ + var(--tw-ring-offset-color); + --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color); + --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity)); + --tw-ring-opacity: .5; + } + .footer { + margin-bottom: 45px; + margin-top: 35px; + text-align: center; + border-bottom: 1px solid #e5e5e5; + } + .footer>p { + font-size: .8rem; + display: inline-block; + padding: 0 10px; + transform: translateY(10px); + background: white; + } + .dark .footer { + border-color: #303030; + } + .dark .footer>p { + background: #0b0f19; + } + .prompt h4{ + margin: 1.25em 0 .25em 0; + font-weight: bold; + font-size: 115%; + } + """ + + block = gr.Blocks(css=css) + + with block: + gr.HTML(""" +
+
+

+ FastDeploy Client +

+
+

+ The client is used for creating requests to fastdeploy server. +

+
+ """) + with gr.Group(): + with gr.Box(): + with gr.Column(): + with gr.Row(): + server_addr_text = gr.Textbox( + label="Server address", + show_label=True, + max_lines=1, + placeholder="localhost:8000", + ) + + model_name_text = gr.Textbox( + label="model name", + show_label=True, + max_lines=1, + placeholder="yolov5", + ) + + model_version_text = gr.Textbox( + label="model version", + show_label=True, + max_lines=1, + placeholder="1", + ) + + check_button = gr.Button("GetInputOutputName") + + with gr.Box(): + gr.Markdown("Inputs") + with gr.Tab("component format"): + gr.Markdown( + "Fill inputs according to your need, choose either image or text for each input." + ) + with gr.Column(): + with gr.Accordion("input 1"): + input_name_1_text = gr.Textbox( + label="input name", interactive=False) + input_1_image = gr.Image(type='numpy') + input_1_text = gr.Textbox( + label="contents", max_lines=1000) + with gr.Accordion("input 2", open=False): + input_name_2_text = gr.Textbox( + label="input name", interactive=False) + input_2_image = gr.Image(type='numpy') + input_2_text = gr.Textbox( + label="contents", max_lines=1000) + + with gr.Accordion("input 3", open=False): + input_name_3_text = gr.Textbox( + label="input name", interactive=False) + input_3_image = gr.Image(type='numpy') + input_3_text = gr.Textbox( + label="contents", max_lines=1000) + with gr.Box(): + gr.Markdown("Outputs") + with gr.Column(): + with gr.Accordion("output 1"): + output_name_1_text = gr.Textbox( + label="output name", interactive=False) + task_select_items1 = gr.Dropdown( + choices=list(supported_tasks.keys()), + value='others(raw data)', + label='task type') + output_1_text = gr.Textbox( + label="raw data", + interactive=False, + show_label=True) + output_1_image = gr.Image(interactive=False) + with gr.Accordion("output 2", open=False): + output_name_2_text = gr.Textbox( + label="output name", interactive=False) + task_select_items2 = gr.Dropdown( + choices=list(supported_tasks.keys()), + value='others(raw data)', + label='task type') + output_2_text = gr.Textbox( + label="raw data", + interactive=False, + show_label=True, + ) + output_2_image = gr.Image(interactive=False) + + with gr.Accordion("output 3", open=False): + output_name_3_text = gr.Textbox( + label="output name", interactive=False) + task_select_items3 = gr.Dropdown( + choices=list(supported_tasks.keys()), + value='others(raw data)', + label='task type') + output_3_text = gr.Textbox( + label="raw data", + interactive=False, + show_label=True) + output_3_image = gr.Image(interactive=False) + component_submit_button = gr.Button("submit") + with gr.Tab("raw format"): + raw_payload_text = gr.Textbox( + label="request payload", max_lines=10000) + with gr.Box(): + gr.Markdown("Outputs") + with gr.Column(): + output_raw_text = gr.Textbox( + label="raw data", interactive=False) + raw_submit_button = gr.Button("submit") + + status_text = gr.Textbox( + label="status", + show_label=True, + max_lines=1, + interactive=False) + all_input_output_components = [ + input_name_1_text, input_name_2_text, input_name_3_text, + input_1_image, input_2_image, input_3_image, input_1_text, + input_2_text, input_3_text, output_name_1_text, output_name_2_text, + output_name_3_text, output_1_text, output_2_text, output_3_text, + output_1_image, output_2_image, output_3_image, task_select_items1, + task_select_items2, task_select_items3 + ] + + def get_input_output_name(server_addr, model_name, model_version): + try: + input_metas, output_metas = _http_manager.get_model_meta( + server_addr, model_name, model_version) + except Exception as e: + return {status_text: str(e)} + input_name_texts = [ + input_name_1_text, input_name_2_text, input_name_3_text + ] + output_name_texts = [ + output_name_1_text, output_name_2_text, output_name_3_text + ] + results = { + component: None + for component in all_input_output_components + } + results[task_select_items1] = 'others(raw data)' + results[task_select_items2] = 'others(raw data)' + results[task_select_items3] = 'others(raw data)' + results[status_text] = 'GetInputOutputName Successful' + for i, input_meta in enumerate(input_metas): + results[input_name_texts[i]] = input_meta['name'] + for i, output_meta in enumerate(output_metas): + results[output_name_texts[i]] = output_meta['name'] + return results + + def component_inference(*args): + server_addr = args[0] + model_name = args[1] + model_version = args[2] + input_name_1 = args[3] + input_1_image_data = args[4] + input_1_text_data = args[5] + input_name_2 = args[6] + input_2_image_data = args[7] + input_2_text_data = args[8] + input_name_3 = args[9] + input_3_image_data = args[10] + input_3_text_data = args[11] + task_select_items1_data = args[12] + task_select_items2_data = args[13] + task_select_items3_data = args[14] + if server_addr and model_name and model_version: + inputs = {} + if input_name_1: + if input_1_image_data is not None: + inputs[input_name_1] = np.array([input_1_image_data]) + if input_1_text_data: + inputs[input_name_1] = np.array( + [[input_1_text_data.encode('utf-8')]], + dtype=np.object_) + if input_name_2: + if input_2_image_data is not None: + inputs[input_name_2] = np.array([input_2_image_data]) + if input_2_text_data: + inputs[input_name_2] = np.array( + [[input_2_text_data.encode('utf-8')]], + dtype=np.object_) + if input_name_3: + if input_3_image_data is not None: + inputs[input_name_3] = np.array([input_3_image_data]) + if input_3_text_data: + inputs[input_name_3] = np.array( + [[input_3_text_data.encode('utf-8')]], + dtype=np.object_) + try: + infer_results = _http_manager.infer( + server_addr, model_name, model_version, inputs) + results = {status_text: 'Inference Successful'} + output_name_texts = [ + output_name_1_text, output_name_2_text, + output_name_3_text + ] + output_texts = [ + output_1_text, output_2_text, output_3_text + ] + output_images = [ + output_1_image, output_2_image, output_3_image + ] + output_task_types = [ + task_select_items1_data, task_select_items2_data, + task_select_items3_data + ] + for i, (output_name, + data) in enumerate(infer_results.items()): + results[output_name_texts[i]] = output_name + results[output_texts[i]] = str(data) + if output_task_types[i] != 'others(raw data)': + results[output_images[i]] = supported_tasks[ + output_task_types[i]](input_1_image_data, data) + return results + except Exception as e: + return {status_text: 'Error: {}'.format(e)} + else: + return { + status_text: + 'Please input server addr, model name and model version.' + } + + def raw_inference(*args): + server_addr = args[0] + model_name = args[1] + model_version = args[2] + payload_text = args[3] + try: + result = _http_manager.raw_infer(server_addr, model_name, + model_version, payload_text) + results = { + status_text: 'Get response from server', + output_raw_text: result + } + return results + except Exception as e: + return {status_text: 'Error: {}'.format(e)} + + check_button.click( + fn=get_input_output_name, + inputs=[server_addr_text, model_name_text, model_version_text], + outputs=[*all_input_output_components, status_text]) + component_submit_button.click( + fn=component_inference, + inputs=[ + server_addr_text, model_name_text, model_version_text, + input_name_1_text, input_1_image, input_1_text, + input_name_2_text, input_2_image, input_2_text, + input_name_3_text, input_3_image, input_3_text, + task_select_items1, task_select_items2, task_select_items3 + ], + outputs=[ + output_name_1_text, output_name_2_text, output_name_3_text, + output_1_text, output_2_text, output_3_text, output_1_image, + output_2_image, output_3_image, status_text + ]) + raw_submit_button.click( + fn=raw_inference, + inputs=[ + server_addr_text, model_name_text, model_version_text, + raw_payload_text + ], + outputs=[output_raw_text, status_text]) + return block diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py new file mode 100644 index 000000000..e9a6ef4d0 --- /dev/null +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -0,0 +1,92 @@ +import json + +import requests +import tritonclient.http as httpclient +from attrdict import AttrDict +from tritonclient.utils import InferenceServerException + + +def convert_http_metadata_config(metadata): + metadata = AttrDict(metadata) + + return metadata + + +def prepare_request(inputs_meta, inputs_data, outputs_meta): + ''' + inputs_meta: inputs meta information from model. name: info + inputs_data: users input data. name: data + ''' + # Set the input data + inputs = [] + for input_dict in inputs_meta: + input_name = input_dict['name'] + if input_name not in inputs_data: + raise RuntimeError( + 'Error: input name {} required for model not existed.'.format( + input_name)) + infer_input = httpclient.InferInput( + input_name, inputs_data[input_name].shape, input_dict['datatype']) + infer_input.set_data_from_numpy(inputs_data[input_name]) + inputs.append(infer_input) + outputs = [] + for output_dict in outputs_meta: + infer_output = httpclient.InferRequestedOutput(output_dict.name) + outputs.append(infer_output) + return inputs, outputs + + +class HttpClientManager: + def __init__(self): + self.clients = {} # server url: httpclient + + def _create_client(self, server_url): + if server_url in self.clients: + return self.clients[server_url] + try: + fastdeploy_client = httpclient.InferenceServerClient(server_url) + self.clients[server_url] = fastdeploy_client + return fastdeploy_client + except Exception: + raise RuntimeError( + 'Can not connect to server {}, please check your \ + server address'.format(server_url)) + + def infer(self, server_url, model_name, model_version, inputs): + fastdeploy_client = self._create_client(server_url) + input_metadata, output_metadata = self.get_model_meta( + server_url, model_name, model_version) + inputs, outputs = prepare_request(input_metadata, inputs, + output_metadata) + response = fastdeploy_client.infer( + model_name, inputs, model_version=model_version, outputs=outputs) + results = {} + for output in output_metadata: + result = response.as_numpy(output.name) # datatype: numpy + if output.datatype == 'BYTES': + result = result[0][0] # datatype: bytes + result = json.loads(result) # datatype: json + else: + result = result[0] + results[output.name] = result + return results + + def raw_infer(self, server_url, model_name, model_version, raw_input): + url = 'http://{}/v2/models/{}/versions/{}/infer'.format( + server_url, model_name, model_version) + res = requests.post(url, data=json.dumps(json.loads(raw_input))) + return json.dumps(res.json()) + + def get_model_meta(self, server_url, model_name, model_version): + fastdeploy_client = self._create_client(server_url) + try: + model_metadata = fastdeploy_client.get_model_metadata( + model_name=model_name, model_version=model_version) + except InferenceServerException as e: + raise RuntimeError("Failed to retrieve the metadata: " + str(e)) + + model_metadata = convert_http_metadata_config(model_metadata) + + input_metadata = model_metadata.inputs + output_metadata = model_metadata.outputs + return input_metadata, output_metadata diff --git a/visualdl/component/inference/fastdeploy_client/visualizer.py b/visualdl/component/inference/fastdeploy_client/visualizer.py new file mode 100644 index 000000000..5a255f6ab --- /dev/null +++ b/visualdl/component/inference/fastdeploy_client/visualizer.py @@ -0,0 +1,102 @@ +import fastdeploy as fd +import numpy as np + +__all__ = [ + 'visualize_detection', 'visualize_keypoint_detection', + 'visualize_face_detection', 'visualize_segmentation', 'visualize_matting', + 'visualize_ocr' +] + + +def visualize_detection(image, data): + boxes = np.array(data['boxes']) + scores = np.array(data['scores']) + label_ids = np.array(data['label_ids']) + masks = np.array(data['masks']) + contain_masks = data['contain_masks'] + detection_result = fd.C.vision.DetectionResult() + detection_result.boxes = boxes + detection_result.scores = scores + detection_result.label_ids = label_ids + detection_result.masks = masks + detection_result.contain_masks = contain_masks + result = fd.vision.vis_detection(image, detection_result) + return result + + +def visualize_keypoint_detection(image, data): + keypoints = np.array(data['keypoints']) + scores = np.array(data['scores']) + num_joints = np.array(data['num_joints']) + + detection_result = fd.C.vision.KeyPointDetectionResult() + detection_result.keypoints = keypoints + detection_result.scores = scores + detection_result.num_joints = num_joints + + result = fd.vision.vis_keypoint_detection(image, detection_result) + return result + + +def visualize_face_detection(image, data): + data = np.array(data['data']) + scores = np.array(data['scores']) + landmarks = np.array(data['landmarks']) + landmarks_per_face = data['landmarks_per_face'] + + detection_result = fd.C.vision.FaceDetectionResult() + detection_result.data = data + detection_result.scores = scores + detection_result.landmarks = landmarks + detection_result.landmarks_per_face = landmarks_per_face + + result = fd.vision.vis_face_detection(image, detection_result) + return result + + +def visualize_segmentation(image, data): + label_ids = np.array(data['label_ids']) + score_map = np.array(data['score_map']) + shape = np.array(data['shape']) + + segmentation_result = fd.C.vision.SegmentationResult() + segmentation_result.shape = shape + segmentation_result.score_map = score_map + segmentation_result.label_ids = label_ids + + result = fd.vision.vis_segmentation(image, segmentation_result) + return result + + +def visualize_matting(image, data): + alpha = np.array(data['alpha']) + foreground = np.array(data['foreground']) + contain_foreground = data['contain_foreground'] + shape = np.array(data['shape']) + + matting_result = fd.C.vision.MattingResult() + matting_result.alpha = alpha + matting_result.foreground = foreground + matting_result.contain_foreground = contain_foreground + matting_result.shape = shape + + result = fd.vision.vis_matting(image, matting_result) + return result + + +def visualize_ocr(image, data): + boxes = np.array(data['boxes']) + text = np.array(data['text']) + rec_scores = np.array(data['rec_scores']) + cls_scores = np.array(data['cls_scores']) + cls_labels = data['cls_labels'] + + ocr_result = fd.C.vision.OCRResult() + ocr_result.boxes = boxes + ocr_result.text = text + ocr_result.rec_scores = rec_scores + ocr_result.cls_scores = cls_scores + ocr_result.cls_labels = cls_labels + + result = fd.vision.vis_ppocr(image, ocr_result) + return result From 16e55fd1f35213471a0fb91f9b63362fde95129f Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 7 Dec 2022 11:17:37 +0800 Subject: [PATCH 08/48] add exception description --- visualdl/component/inference/fastdeploy_server.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 450b36757..7801bf281 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -81,6 +81,10 @@ def config_update(self, cur_dir, model_name, config): def start_server(self, configs): configs = json.loads(configs) process = launch_process(configs) + if process.poll() is not None: + raise RuntimeError( + "Launch fastdeploy server failed, please check your launching arguments" + ) self.opened_servers[process.pid] = process return process.pid From ef08cf23466902e9299fa99c3701a93ce9f74f74 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 7 Dec 2022 14:50:24 +0800 Subject: [PATCH 09/48] fix --- .../component/inference/fastdeploy_lib.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 096976b05..a5b5ccd47 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -219,32 +219,32 @@ def analyse_step_relationships(step_config, inputs, outputs): for var in inputs: if var['name'] not in vars_dict: vars_dict[var['name']] = {} - vars_dict[var['name']]['from_models'] = [] - vars_dict[var['name']]['to_models'] = [] - vars_dict[var['name']]['from_models'].append('feed') + vars_dict[var['name']]['from_models'] = set() + vars_dict[var['name']]['to_models'] = set() + vars_dict[var['name']]['from_models'].add('feed') for var in outputs: if var['name'] not in vars_dict: vars_dict[var['name']] = {} - vars_dict[var['name']]['from_models'] = [] - vars_dict[var['name']]['to_models'] = [] - vars_dict[var['name']]['to_models'].append('fetch') + vars_dict[var['name']]['from_models'] = set() + vars_dict[var['name']]['to_models'] = set() + vars_dict[var['name']]['to_models'].add('fetch') else: for var_placehold_name, var_name in model_config_in_step[ 'inputMap'].items(): if var_name not in vars_dict: vars_dict[var_name] = {} - vars_dict[var_name]['from_models'] = [] - vars_dict[var_name]['to_models'] = [] - vars_dict[var_name]['to_models'].append( + vars_dict[var_name]['from_models'] = set() + vars_dict[var_name]['to_models'] = set() + vars_dict[var_name]['to_models'].add( model_config_in_step['modelName']) for var_placehold_name, var_name in model_config_in_step[ 'outputMap'].items(): if var_name not in vars_dict: vars_dict[var_name] = {} - vars_dict[var_name]['from_models'] = [] - vars_dict[var_name]['to_models'] = [] - vars_dict[var_name]['from_models'].append( + vars_dict[var_name]['from_models'] = set() + vars_dict[var_name]['to_models'] = set() + vars_dict[var_name]['from_models'].add( model_config_in_step['modelName']) for var_name, relationships in vars_dict.items(): for from_model in relationships['from_models']: From 50f080270953d56ed19864e63871a34b103a2b1c Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 7 Dec 2022 17:15:00 +0800 Subject: [PATCH 10/48] add model repository judgement --- visualdl/component/inference/fastdeploy_lib.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index a5b5ccd47..89df30e3c 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -67,6 +67,9 @@ def analyse_config(cur_dir: str): all_model_versions[model_name][model_sub_dir] = [] all_model_versions[model_name][model_sub_dir].append( version_resource_file) + if not all_model_configs: + raise Exception( + 'Not a valid model repository, please choose the right path') return all_model_configs, all_model_versions, all_model_paths From 66064d26ad5c231fa3f6f2251e57f5c149863e82 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 8 Dec 2022 10:56:16 +0800 Subject: [PATCH 11/48] add component tab for fastdeploy client --- visualdl/server/api.py | 5 ++++- visualdl/server/args.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/visualdl/server/api.py b/visualdl/server/api.py index 502bf48f0..0ef7b6dc1 100644 --- a/visualdl/server/api.py +++ b/visualdl/server/api.py @@ -417,7 +417,10 @@ def get_component_tabs(*apis, vdl_args, request_args): all_tabs.update(api('component_tabs', request_args)) all_tabs.add('static_graph') else: - return ['static_graph', 'x2paddle', 'fastdeploy_server'] + return [ + 'static_graph', 'x2paddle', 'fastdeploy_server', + 'fastdeploy_client' + ] return list(all_tabs) diff --git a/visualdl/server/args.py b/visualdl/server/args.py index cb42422c7..71f97afb1 100644 --- a/visualdl/server/args.py +++ b/visualdl/server/args.py @@ -78,7 +78,8 @@ def validate_args(args): supported_tabs = [ 'scalar', 'image', 'text', 'embeddings', 'audio', 'histogram', 'hyper_parameters', 'static_graph', 'dynamic_graph', 'pr_curve', - 'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server' + 'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server', + 'fastdeploy_client' ] if args.component_tabs is not None: for component_tab in args.component_tabs: From 690f55d65546f8907016bb7ea9dceda0708e317f Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 8 Dec 2022 14:36:50 +0800 Subject: [PATCH 12/48] update more tasks in fastdeploy client --- .../inference/fastdeploy_client/client_app.py | 18 +++++++++ .../fastdeploy_client/http_client_manager.py | 14 +++++++ .../inference/fastdeploy_client/visualizer.py | 39 ++++++++++++++++++- .../component/inference/fastdeploy_lib.py | 14 +++++++ 4 files changed, 83 insertions(+), 2 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py index c2428e5ac..cc1925bb9 100644 --- a/visualdl/component/inference/fastdeploy_client/client_app.py +++ b/visualdl/component/inference/fastdeploy_client/client_app.py @@ -1,9 +1,25 @@ +# Copyright (c) 2022 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= import gradio as gr import numpy as np from .http_client_manager import HttpClientManager from .visualizer import visualize_detection +from .visualizer import visualize_face_alignment from .visualizer import visualize_face_detection +from .visualizer import visualize_headpose from .visualizer import visualize_keypoint_detection from .visualizer import visualize_matting from .visualizer import visualize_ocr @@ -18,6 +34,8 @@ 'segmentation': visualize_segmentation, 'matting': visualize_matting, 'ocr': visualize_ocr, + 'facealignment': visualize_face_alignment, + 'headpose': visualize_headpose, 'others(raw data)': lambda x: str(x) } diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index e9a6ef4d0..0fb300023 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -1,3 +1,17 @@ +# Copyright (c) 2022 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= import json import requests diff --git a/visualdl/component/inference/fastdeploy_client/visualizer.py b/visualdl/component/inference/fastdeploy_client/visualizer.py index 5a255f6ab..5abe570e0 100644 --- a/visualdl/component/inference/fastdeploy_client/visualizer.py +++ b/visualdl/component/inference/fastdeploy_client/visualizer.py @@ -1,10 +1,25 @@ +# Copyright (c) 2022 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= import fastdeploy as fd import numpy as np __all__ = [ 'visualize_detection', 'visualize_keypoint_detection', - 'visualize_face_detection', 'visualize_segmentation', 'visualize_matting', - 'visualize_ocr' + 'visualize_face_detection', 'visualize_face_alignment', + 'visualize_segmentation', 'visualize_matting', 'visualize_ocr', + 'visualize_headpose' ] @@ -54,6 +69,16 @@ def visualize_face_detection(image, data): return result +def visualize_face_alignment(image, data): + landmarks = np.array(data['landmarks']) + + facealignment_result = fd.C.vision.FaceAlignmentResult() + facealignment_result.landmarks = landmarks + + result = fd.vision.vis_face_alignment(image, facealignment_result) + return result + + def visualize_segmentation(image, data): label_ids = np.array(data['label_ids']) score_map = np.array(data['score_map']) @@ -100,3 +125,13 @@ def visualize_ocr(image, data): result = fd.vision.vis_ppocr(image, ocr_result) return result + + +def visualize_headpose(image, data): + euler_angles = np.array(data['euler_angles']) + + headpose_result = fd.C.vision.HeadPoseResult() + headpose_result.euler_angles = euler_angles + + result = fd.vision.vis_headpose(image, headpose_result) + return result diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 89df30e3c..a9faec3e3 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -1,3 +1,17 @@ +# Copyright (c) 2022 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= import copy import json import os From 78933331f290834b17862db08c26781171a0687e Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 8 Dec 2022 14:52:34 +0800 Subject: [PATCH 13/48] sort filenames --- visualdl/component/inference/fastdeploy_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 7801bf281..deaf2a69c 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -49,6 +49,7 @@ def get_directory(self, cur_dir): cur_dir, sub_dirs, filenames = os.walk(cur_dir).send(None) if Path(self.root_dir) != Path(os.path.abspath(cur_dir)): sub_dirs.append('..') + sub_dirs = sorted(sub_dirs) directorys = { 'parent_dir': os.path.relpath(Path(os.path.abspath(cur_dir)), self.root_dir), From e5e41b084a12be484dc1726322a879f00a2d47ee Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 8 Dec 2022 15:10:20 +0800 Subject: [PATCH 14/48] backup config --- visualdl/component/inference/fastdeploy_server.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index deaf2a69c..545357e7d 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ======================================================================= +import datetime import json import os +import shutil import socket import time from multiprocessing import Process @@ -74,6 +76,12 @@ def config_update(self, cur_dir, model_name, config): model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), model_name)] text_proto = json2pbtxt(json.dumps(all_models[model_name])) + # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data + shutil.copy( + os.path.join(model_dir, 'config.pbtxt'), + os.path.join( + model_dir, 'config_vdlbackup_{}.pbtxt'.format( + datetime.datetime.now().isoformat()))) with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f: f.write(text_proto) return From 2d857ae63b05c3a50b48797ecb3cea872f299464 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 8 Dec 2022 15:16:24 +0800 Subject: [PATCH 15/48] noqa for autogenerated file --- visualdl/component/inference/proto/model_config/protxt_pb2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/visualdl/component/inference/proto/model_config/protxt_pb2.py b/visualdl/component/inference/proto/model_config/protxt_pb2.py index 4d5b645d4..70bf7b906 100644 --- a/visualdl/component/inference/proto/model_config/protxt_pb2.py +++ b/visualdl/component/inference/proto/model_config/protxt_pb2.py @@ -1,3 +1,4 @@ +# flake8: noqa # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: model_config.protxt From 72052095c2898c19e01d68f18518e0fe78a93e98 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 9 Dec 2022 11:37:31 +0800 Subject: [PATCH 16/48] add data validation --- .../component/inference/fastdeploy_lib.py | 31 ++++++++++++++++--- .../component/inference/fastdeploy_server.py | 4 ++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index a9faec3e3..37a5158c2 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -47,6 +47,22 @@ def json2pbtxt(content: str): return text_proto +def validate_data(model_config): + ''' + Validate data in model config, we should check empty value recieved from front end. + The easiest way to handle it is to drop empty value. + Args: + model_config: model config to be saved in config file + Return: + model config after filtering. + ''' + model_config_filtered = {} + for key, value in model_config.items(): + if value: + model_config_filtered[key] = value + return model_config_filtered + + def analyse_config(cur_dir: str): ''' Analyse the model config in specified directory. @@ -69,6 +85,8 @@ def analyse_config(cur_dir: str): pbtxt2json(open(os.path.join(model_dir, filename)).read())) all_model_configs[ model_name] = json_config # store original config file content in json format + if 'name' not in json_config: + json_config['name'] = model_name for model_sub_dir in model_sub_dirs: if re.match( r'\d+', @@ -220,7 +238,7 @@ def original_format_to_exchange_format(original_format, version_info): return exchange_format -def analyse_step_relationships(step_config, inputs, outputs): +def analyse_step_relationships(step_config, inputs, outputs): # noqa: C901 ''' Analyse model relationships in ensemble step. And fill \ "inputModels", "outputModels", "inputVars", "outputVars" in step_config. @@ -266,12 +284,15 @@ def analyse_step_relationships(step_config, inputs, outputs): for var_name, relationships in vars_dict.items(): for from_model in relationships['from_models']: models_dict[from_model]['outputVars'].append(var_name) - models_dict[from_model]['outputModels'].extend( - relationships['to_models']) + for var_to_model in relationships['to_models']: + if var_to_model not in models_dict[from_model]['outputModels']: + models_dict[from_model]['outputModels'].append( + var_to_model) for to_model in relationships['to_models']: models_dict[to_model]['inputVars'].append(var_name) - models_dict[to_model]['inputModels'].extend( - relationships['from_models']) + for var_from_model in relationships['from_models']: + if var_from_model not in models_dict[to_model]['inputModels']: + models_dict[to_model]['inputModels'].append(var_from_model) def launch_process(kwargs: dict): diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 545357e7d..ccb34d715 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -31,6 +31,7 @@ from .fastdeploy_lib import kill_process from .fastdeploy_lib import launch_process from .fastdeploy_lib import original_format_to_exchange_format +from .fastdeploy_lib import validate_data from visualdl.server.api import gen_result from visualdl.server.api import result from visualdl.utils.dir import FASTDEPLOYSERVER_PATH @@ -75,7 +76,8 @@ def config_update(self, cur_dir, model_name, config): all_models = exchange_format_to_original_format(config) model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), model_name)] - text_proto = json2pbtxt(json.dumps(all_models[model_name])) + filtered_config = validate_data(all_models[model_name]) + text_proto = json2pbtxt(json.dumps(filtered_config)) # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data shutil.copy( os.path.join(model_dir, 'config.pbtxt'), From 6ac4e5e91cf608e4cde684a6dbf4addc8bb5e6b1 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 9 Dec 2022 17:24:40 +0800 Subject: [PATCH 17/48] add __init__ for package --- visualdl/component/inference/fastdeploy_client/__init__.py | 0 visualdl/component/inference/proto/model_config/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 visualdl/component/inference/fastdeploy_client/__init__.py create mode 100644 visualdl/component/inference/proto/model_config/__init__.py diff --git a/visualdl/component/inference/fastdeploy_client/__init__.py b/visualdl/component/inference/fastdeploy_client/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/visualdl/component/inference/proto/model_config/__init__.py b/visualdl/component/inference/proto/model_config/__init__.py new file mode 100644 index 000000000..e69de29bb From a4407b8fa3edc8fa492ffb0096c12adf213bc438 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 12 Dec 2022 11:40:51 +0800 Subject: [PATCH 18/48] add calculating layout for frontend --- .../component/inference/fastdeploy_lib.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 37a5158c2..78ffd5dbb 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -19,6 +19,7 @@ import re import signal import string +from collections import defaultdict from subprocess import Popen from subprocess import STDOUT @@ -293,6 +294,52 @@ def analyse_step_relationships(step_config, inputs, outputs): # noqa: C901 for var_from_model in relationships['from_models']: if var_from_model not in models_dict[to_model]['inputModels']: models_dict[to_model]['inputModels'].append(var_from_model) + calculate_layout_for_frontend(models_dict) + + +def calculate_layout_for_frontend(model_config_in_step): + ''' + Analyse model topology connections and prepare the positions for each model in layout. + Dynamic program algorithm: + depth(cur_node) = max([depth(prev_node) for prev_node in cur_node['inputModels']]) + Args: + model_config_in_step(dict): model config in ensemble models' step, indexed by model name. + Returns: + None. Results calculated will be saved in place. + ''' + path_depth = defaultdict(int) + + def depth_recursive(model): + if model['modelName'] == 'feed': + path_depth[model['modelName']] = 0 + return 0 + if path_depth[model['modelName']] != 0: + return path_depth[model['modelName']] + path_depth[model['modelName']] = max([ + depth_recursive(model_config_in_step[model_name]) for model_name in + model_config_in_step[model['modelName']]['inputModels'] + ]) + 1 + return path_depth[model['modelName']] + + depth_recursive(model_config_in_step['fetch']) + path_depth_tuple = [ + (k, v) + for k, v in sorted(path_depth.items(), key=lambda item: item[1]) + ] + cur_x = 0 + last_depth = -1 + for model_name, depth in path_depth_tuple: + if depth == last_depth: + model_config_in_step[model_name]['pos_y'] = depth + model_config_in_step[model_name]['pos_x'] = cur_x + cur_x += 1 + else: + cur_x = 0 + model_config_in_step[model_name]['pos_y'] = depth + model_config_in_step[model_name]['pos_x'] = cur_x + cur_x += 1 + last_depth = depth + return def launch_process(kwargs: dict): From cf65c71e7072fae4bf94344bb7443f257d18a765 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 14 Dec 2022 16:41:48 +0800 Subject: [PATCH 19/48] add alive server detection and optimize client --- .../inference/fastdeploy_client/client_app.py | 252 +++++++----------- .../component/inference/fastdeploy_lib.py | 137 +++++++++- .../component/inference/fastdeploy_server.py | 18 +- visualdl/server/app.py | 52 +++- 4 files changed, 303 insertions(+), 156 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py index cc1925bb9..fcc064b62 100644 --- a/visualdl/component/inference/fastdeploy_client/client_app.py +++ b/visualdl/component/inference/fastdeploy_client/client_app.py @@ -36,7 +36,7 @@ 'ocr': visualize_ocr, 'facealignment': visualize_face_alignment, 'headpose': visualize_headpose, - 'others(raw data)': lambda x: str(x) + 'unspecified': lambda x: str(x) } @@ -56,11 +56,6 @@ def create_gradio_client_app(): # noqa:C901 .dark input[type='range'] { accent-color: #dfdfdf; } - .container { - max-width: 1200px; - margin: auto; - padding-top: 1.5rem; - } #gallery { min-height: 22rem; margin-bottom: 15px; @@ -162,84 +157,76 @@ def create_gradio_client_app(): # noqa:C901 placeholder="1", ) - check_button = gr.Button("GetInputOutputName") - with gr.Box(): - gr.Markdown("Inputs") with gr.Tab("component format"): - gr.Markdown( - "Fill inputs according to your need, choose either image or text for each input." - ) - with gr.Column(): - with gr.Accordion("input 1"): - input_name_1_text = gr.Textbox( - label="input name", interactive=False) - input_1_image = gr.Image(type='numpy') - input_1_text = gr.Textbox( - label="contents", max_lines=1000) - with gr.Accordion("input 2", open=False): - input_name_2_text = gr.Textbox( - label="input name", interactive=False) - input_2_image = gr.Image(type='numpy') - input_2_text = gr.Textbox( - label="contents", max_lines=1000) - - with gr.Accordion("input 3", open=False): - input_name_3_text = gr.Textbox( - label="input name", interactive=False) - input_3_image = gr.Image(type='numpy') - input_3_text = gr.Textbox( - label="contents", max_lines=1000) - with gr.Box(): - gr.Markdown("Outputs") - with gr.Column(): - with gr.Accordion("output 1"): - output_name_1_text = gr.Textbox( - label="output name", interactive=False) - task_select_items1 = gr.Dropdown( - choices=list(supported_tasks.keys()), - value='others(raw data)', - label='task type') - output_1_text = gr.Textbox( - label="raw data", - interactive=False, - show_label=True) - output_1_image = gr.Image(interactive=False) - with gr.Accordion("output 2", open=False): - output_name_2_text = gr.Textbox( - label="output name", interactive=False) - task_select_items2 = gr.Dropdown( - choices=list(supported_tasks.keys()), - value='others(raw data)', - label='task type') - output_2_text = gr.Textbox( - label="raw data", - interactive=False, - show_label=True, - ) - output_2_image = gr.Image(interactive=False) + check_button = gr.Button("GetInputOutputName") + component_format_column = gr.Column(visible=False) + with component_format_column: + task_radio = gr.Radio( + choices=list(supported_tasks.keys()), + value='unspecified', + label='task type', + visible=True) + gr.Markdown( + "Fill inputs according to your need, choose either image or text for each input." + ) + with gr.Row(): + with gr.Column(): + gr.Markdown("Inputs") + input_accordions = [] + input_name_texts = [] + input_images = [] + input_texts = [] + for i in range(6): + accordion = gr.Accordion( + "input {}".format(i), + open=True, + visible=False) + with accordion: + input_name_text = gr.Textbox( + label="input name", + interactive=False) + input_image = gr.Image(type='numpy') + input_text = gr.Textbox( + label="contents", max_lines=1000) + input_accordions.append(accordion) + input_name_texts.append(input_name_text) + input_images.append(input_image) + input_texts.append(input_text) - with gr.Accordion("output 3", open=False): - output_name_3_text = gr.Textbox( - label="output name", interactive=False) - task_select_items3 = gr.Dropdown( - choices=list(supported_tasks.keys()), - value='others(raw data)', - label='task type') - output_3_text = gr.Textbox( - label="raw data", - interactive=False, - show_label=True) - output_3_image = gr.Image(interactive=False) - component_submit_button = gr.Button("submit") + with gr.Column(): + gr.Markdown("Outputs") + output_accordions = [] + output_name_texts = [] + output_images = [] + output_texts = [] + for i in range(6): + accordion = gr.Accordion( + "output {}".format(i), + open=True, + visible=False) + with accordion: + output_name_text = gr.Textbox( + label="output name", + interactive=False) + output_text = gr.Textbox( + label="raw data", + interactive=False, + show_label=True) + output_image = gr.Image( + interactive=False) + output_accordions.append(accordion) + output_name_texts.append(output_name_text) + output_images.append(output_image) + output_texts.append(output_text) + component_submit_button = gr.Button("submit") with gr.Tab("raw format"): raw_payload_text = gr.Textbox( label="request payload", max_lines=10000) - with gr.Box(): + with gr.Column(): gr.Markdown("Outputs") - with gr.Column(): - output_raw_text = gr.Textbox( - label="raw data", interactive=False) + output_raw_text = gr.Textbox( + label="raw data", interactive=False) raw_submit_button = gr.Button("submit") status_text = gr.Textbox( @@ -247,14 +234,8 @@ def create_gradio_client_app(): # noqa:C901 show_label=True, max_lines=1, interactive=False) - all_input_output_components = [ - input_name_1_text, input_name_2_text, input_name_3_text, - input_1_image, input_2_image, input_3_image, input_1_text, - input_2_text, input_3_text, output_name_1_text, output_name_2_text, - output_name_3_text, output_1_text, output_2_text, output_3_text, - output_1_image, output_2_image, output_3_image, task_select_items1, - task_select_items2, task_select_items3 - ] + all_input_output_components = input_accordions + input_name_texts + input_images + \ + input_texts + output_accordions + output_name_texts + output_images + output_texts def get_input_output_name(server_addr, model_name, model_version): try: @@ -262,23 +243,22 @@ def get_input_output_name(server_addr, model_name, model_version): server_addr, model_name, model_version) except Exception as e: return {status_text: str(e)} - input_name_texts = [ - input_name_1_text, input_name_2_text, input_name_3_text - ] - output_name_texts = [ - output_name_1_text, output_name_2_text, output_name_3_text - ] results = { component: None for component in all_input_output_components } - results[task_select_items1] = 'others(raw data)' - results[task_select_items2] = 'others(raw data)' - results[task_select_items3] = 'others(raw data)' + results[component_format_column] = gr.update(visible=True) + results[check_button] = gr.update(visible=False) + for input_accordio in input_accordions: + results[input_accordio] = gr.update(visible=False) + for output_accordio in output_accordions: + results[output_accordio] = gr.update(visible=False) results[status_text] = 'GetInputOutputName Successful' for i, input_meta in enumerate(input_metas): + results[input_accordions[i]] = gr.update(visible=True) results[input_name_texts[i]] = input_meta['name'] for i, output_meta in enumerate(output_metas): + results[output_accordions[i]] = gr.update(visible=True) results[output_name_texts[i]] = output_meta['name'] return results @@ -286,66 +266,36 @@ def component_inference(*args): server_addr = args[0] model_name = args[1] model_version = args[2] - input_name_1 = args[3] - input_1_image_data = args[4] - input_1_text_data = args[5] - input_name_2 = args[6] - input_2_image_data = args[7] - input_2_text_data = args[8] - input_name_3 = args[9] - input_3_image_data = args[10] - input_3_text_data = args[11] - task_select_items1_data = args[12] - task_select_items2_data = args[13] - task_select_items3_data = args[14] + names = args[3:3 + len(input_name_texts)] + images = args[3 + len(input_name_texts):3 + len(input_name_texts) + + len(input_images)] + texts = args[3 + len(input_name_texts) + len(input_images):3 + + len(input_name_texts) + len(input_images) + + len(input_texts)] + task_type = args[-1] if server_addr and model_name and model_version: inputs = {} - if input_name_1: - if input_1_image_data is not None: - inputs[input_name_1] = np.array([input_1_image_data]) - if input_1_text_data: - inputs[input_name_1] = np.array( - [[input_1_text_data.encode('utf-8')]], - dtype=np.object_) - if input_name_2: - if input_2_image_data is not None: - inputs[input_name_2] = np.array([input_2_image_data]) - if input_2_text_data: - inputs[input_name_2] = np.array( - [[input_2_text_data.encode('utf-8')]], - dtype=np.object_) - if input_name_3: - if input_3_image_data is not None: - inputs[input_name_3] = np.array([input_3_image_data]) - if input_3_text_data: - inputs[input_name_3] = np.array( - [[input_3_text_data.encode('utf-8')]], - dtype=np.object_) + for i, input_name in enumerate(names): + if input_name: + if images[i] is not None: + inputs[input_name] = np.array([images[i]]) + if texts[i]: + inputs[input_name] = np.array( + [[texts[i].encode('utf-8')]], dtype=np.object_) try: infer_results = _http_manager.infer( server_addr, model_name, model_version, inputs) results = {status_text: 'Inference Successful'} - output_name_texts = [ - output_name_1_text, output_name_2_text, - output_name_3_text - ] - output_texts = [ - output_1_text, output_2_text, output_3_text - ] - output_images = [ - output_1_image, output_2_image, output_3_image - ] - output_task_types = [ - task_select_items1_data, task_select_items2_data, - task_select_items3_data - ] for i, (output_name, data) in enumerate(infer_results.items()): results[output_name_texts[i]] = output_name results[output_texts[i]] = str(data) - if output_task_types[i] != 'others(raw data)': - results[output_images[i]] = supported_tasks[ - output_task_types[i]](input_1_image_data, data) + if task_type != 'unspecified': + try: + results[output_images[i]] = supported_tasks[ + task_type](images[0], data) + except Exception: + results[output_images[i]] = None return results except Exception as e: return {status_text: 'Error: {}'.format(e)} @@ -374,20 +324,18 @@ def raw_inference(*args): check_button.click( fn=get_input_output_name, inputs=[server_addr_text, model_name_text, model_version_text], - outputs=[*all_input_output_components, status_text]) + outputs=[ + *all_input_output_components, check_button, + component_format_column, status_text + ]) component_submit_button.click( fn=component_inference, inputs=[ server_addr_text, model_name_text, model_version_text, - input_name_1_text, input_1_image, input_1_text, - input_name_2_text, input_2_image, input_2_text, - input_name_3_text, input_3_image, input_3_text, - task_select_items1, task_select_items2, task_select_items3 + *input_name_texts, *input_images, *input_texts, task_radio ], outputs=[ - output_name_1_text, output_name_2_text, output_name_3_text, - output_1_text, output_2_text, output_3_text, output_1_image, - output_2_image, output_3_image, status_text + *output_name_texts, *output_images, *output_texts, status_text ]) raw_submit_button.click( fn=raw_inference, diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 78ffd5dbb..80bc170ba 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -25,6 +25,7 @@ import google.protobuf.json_format as json_format import google.protobuf.text_format as text_format +import requests from .proto.model_config.protxt_pb2 import ModelConfig from visualdl.utils.dir import FASTDEPLOYSERVER_PATH @@ -347,9 +348,14 @@ def launch_process(kwargs: dict): Launch a fastdeploy server according to specified arguments. ''' cmd = ['fastdeployserver'] + start_args = {} for key, value in kwargs.items(): + if key == 'default_model_name': # Used to fill client model_name automatically + start_args[key] = value + pass cmd.append('--{}'.format(key)) cmd.append('{}'.format(value)) + start_args[key] = value logfilename = 'logfile-{}'.format(get_random_string(8)) while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)): @@ -364,8 +370,8 @@ def launch_process(kwargs: dict): with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(p.pid)), 'w') as f: f.write( - logfilename - ) # filename ${p.pid} contain the real log filename ${logfilename} + logfilename + '\n' + json.dumps(start_args) + ) # filename ${p.pid} contain the real log filename ${logfilename}, and start arguments return p @@ -376,6 +382,25 @@ def get_random_string(length): return result_str +def get_start_arguments(server_id): + ''' + Get the start arguments for fastdeployserver process. + Args: + server_id(int): fastdeployserver process id + Returns: + args(dict): launch arguments when start fastdeployserver process. + ''' + args = {} + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))): + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), + 'r') as f: + f.readline() + args = json.loads(f.read()) + return args + + def get_process_output(pid, length): ''' Get the standard output of a opened subprocess. @@ -423,3 +448,111 @@ def kill_process(process): os.remove( os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))) os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))) + + +def get_alive_fastdeploy_servers(): + ''' + Search pids in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \ + some unexpectable reasons, delete log file. + ''' + pids = [ + name for name in os.listdir(FASTDEPLOYSERVER_PATH) + if 'logfile' not in name + ] + should_delete_pids = [] + for pid in pids: + if check_process_alive(pid) is False: + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), + 'r') as f: + logfilename = f.read() + # delete file ${logfilename} if exists + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, + '{}'.format(logfilename))): + os.remove( + os.path.join(FASTDEPLOYSERVER_PATH, + '{}'.format(logfilename))) + os.remove( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))) + should_delete_pids.append(pid) + for pid in should_delete_pids: + pids.remove(pid) + return pids + + +def check_process_alive(pid): + ''' + Given a pid, check whether the process is alive or not. + Args: + pid(int): process id + Return: + status(bool): True if process is still alive. + ''' + try: + os.kill(pid, 0) + except OSError: + return False + else: + return True + + +_metric_column_name = { + "Model": { + "nv_inference_request_success", "nv_inference_request_failure", + "nv_inference_count", "nv_inference_exec_count", + "nv_inference_request_duration_us", "nv_inference_queue_duration_us", + "nv_inference_compute_input_duration_us", + "nv_inference_compute_infer_duration_us", + "nv_inference_compute_output_duration_us" + }, + "GPU": { + "nv_gpu_power_usage", "nv_gpu_power_limit", "nv_energy_consumption", + "nv_gpu_utilization", "nv_gpu_memory_total_bytes", + "nv_gpu_memory_used_bytes" + }, + "CPU": { + "nv_cpu_utilization", "nv_cpu_memory_total_bytes", + "nv_cpu_memory_used_bytes" + } +} + + +def generate_metric_table(server_addr, server_port): + model_table = {} + gpu_table = {} + + res = requests.get("http://{}:{}/metrics") + metric_content = res.text + for content in metric_content.split('\n'): + if content.startwith('#'): + continue + else: + res = re.match(r'(\w+)({.*}) (\w+)', + content) # match output by server metrics interface + metric_name = res.group(1) + model = res.group(2) + value = res.group(3) + infos = {} + for info in model.split(','): + k, v = info.split('=') + v = v.strip('"') + infos[k] = v + for key, metric_names in _metric_column_name.items(): + if metric_name in metric_names: + if key == 'Model': + model_name = infos['model'] + if model_name not in model_table: + model_table[model_name] = {} + model_table[model_name][metric_name] = value + elif key == 'GPU': + gpu_name = infos['gpu_uuid'] + gpu_table[gpu_name][metric_name] = value + elif key == 'CPU': + pass + results = {} + results['Model'] = model_table + results['GPU'] = gpu_table + return results diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index ccb34d715..4b02f829a 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -26,7 +26,10 @@ from .fastdeploy_client.client_app import create_gradio_client_app from .fastdeploy_lib import analyse_config from .fastdeploy_lib import exchange_format_to_original_format +from .fastdeploy_lib import generate_metric_table +from .fastdeploy_lib import get_alive_fastdeploy_servers from .fastdeploy_lib import get_process_output +from .fastdeploy_lib import get_start_arguments from .fastdeploy_lib import json2pbtxt from .fastdeploy_lib import kill_process from .fastdeploy_lib import launch_process @@ -127,6 +130,17 @@ def get_server_output(self, server_id, length): else: return + @result() + def get_server_metric(self, server_id): + args = get_start_arguments(server_id) + host = 'localhost' + port = args.get('metrics-port', 8002) + return generate_metric_table(host, port) + + @result() + def get_server_list(self): + return get_alive_fastdeploy_servers() + def create_fastdeploy_client(self): if self.client_port is None: @@ -166,7 +180,9 @@ def create_fastdeploy_api_call(): 'start_server': (api.start_server, ['config']), 'stop_server': (api.stop_server, ['server_id']), 'get_server_output': (api.get_server_output, ['server_id', 'length']), - 'create_fastdeploy_client': (api.create_fastdeploy_client, []) + 'create_fastdeploy_client': (api.create_fastdeploy_client, []), + 'get_server_list': (api.get_server_list, []), + 'get_server_metric': (api.get_server_metric, ['server_id']) } def call(path: str, args): diff --git a/visualdl/server/app.py b/visualdl/server/app.py index fc67738e6..8cf1dea42 100644 --- a/visualdl/server/app.py +++ b/visualdl/server/app.py @@ -19,6 +19,7 @@ import sys import threading import time +import urllib import webbrowser import requests @@ -32,6 +33,7 @@ import visualdl.server from visualdl import __version__ +from visualdl.component.inference.fastdeploy_lib import get_start_arguments from visualdl.component.inference.fastdeploy_server import create_fastdeploy_api_call from visualdl.component.inference.model_convert_server import create_model_convert_api_call from visualdl.component.profiler.profiler_server import create_profiler_api_call @@ -170,11 +172,18 @@ def serve_fastdeploy_create_fastdeploy_client(): try: if request.method == 'POST': fastdeploy_api_call('create_fastdeploy_client', request.form) + request_args = request.form else: fastdeploy_api_call('create_fastdeploy_client', request.args) + request_args = request.args except Exception as e: error_msg = '{}'.format(e) return make_response(error_msg) + args = urllib.parse.urlencode(request_args) + if args: + return redirect( + api_path + "/fastdeploy/fastdeploy_client/app?{}".format(args), + code=302) return redirect( api_path + "/fastdeploy/fastdeploy_client/app", code=302) @@ -194,9 +203,11 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str): if request.method == 'POST': port = fastdeploy_api_call('create_fastdeploy_client', request.form) + request_args = request.form else: port = fastdeploy_api_call('create_fastdeploy_client', request.args) + request_args = request.args if path == 'app': proxy_url = request.url.replace( request.host_url.rstrip('/') + api_path + @@ -217,8 +228,47 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str): data=request.get_data(), cookies=request.cookies, allow_redirects=False) + if path == 'app': + content = resp.content + if request_args and 'server_id' in request_args: + server_id = request_args.get('server_id') + start_args = get_start_arguments(server_id) + http_port = start_args.get('http-port', '') + model_name = start_args.get('default_model_name', '') + content = content.decode() + try: + default_server_addr = re.search( + r'"label": "Server address".*?"value": "".*?}', + content).group(0) + cur_server_addr = default_server_addr.replace( + '"value": ""', + '"value": "localhost:{}"'.format(http_port)) + default_model_name = re.search( + r'"label": "model name".*?"value": "".*?}', + content).group(0) + cur_model_name = default_model_name.replace( + '"value": ""', '"value": "{}"'.format(model_name)) + default_model_version = re.search( + r'"label": "model version".*?"value": "".*?}', + content).group(0) + cur_model_version = default_model_version.replace( + '"value": ""', '"value": "{}"'.format('1')) + if http_port: + content = content.replace(default_server_addr, + cur_server_addr) + if model_name: + content = content.replace(default_model_name, + cur_model_name) + content = content.replace(default_model_version, + cur_model_version) + except Exception: + pass + finally: + content = content.encode() + else: + content = resp.content headers = [(name, value) for (name, value) in resp.raw.headers.items()] - response = Response(resp.content, resp.status_code, headers) + response = Response(content, resp.status_code, headers) return response @app.route(api_path + '/component_tabs') From fed60faf2036122233d7a883d20c53780bd0442f Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 14 Dec 2022 16:51:04 +0800 Subject: [PATCH 20/48] add alive server detection and optimize client --- visualdl/component/inference/fastdeploy_lib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 80bc170ba..dcf36fe59 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -408,7 +408,7 @@ def get_process_output(pid, length): if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), 'r') as f: - logfilename = f.read() + logfilename = f.readline().strip('\n') # delete file ${logfilename} if exists if os.path.exists( os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))): @@ -441,7 +441,7 @@ def kill_process(process): if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), 'r') as f: - logfilename = f.read() + logfilename = f.readline().strip('\n') # delete file ${logfilename} if exists if os.path.exists( os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))): @@ -467,7 +467,7 @@ def get_alive_fastdeploy_servers(): with open( os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), 'r') as f: - logfilename = f.read() + logfilename = f.readline().strip('\n') # delete file ${logfilename} if exists if os.path.exists( os.path.join(FASTDEPLOYSERVER_PATH, From b9f0d078676249c65f454a0df6e7229fa761a410 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 14 Dec 2022 17:12:35 +0800 Subject: [PATCH 21/48] add alive server detection and optimize client --- visualdl/component/inference/fastdeploy_lib.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index dcf36fe59..80ff1800a 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -491,6 +491,7 @@ def check_process_alive(pid): Return: status(bool): True if process is still alive. ''' + pid = int(pid) try: os.kill(pid, 0) except OSError: @@ -524,14 +525,16 @@ def generate_metric_table(server_addr, server_port): model_table = {} gpu_table = {} - res = requests.get("http://{}:{}/metrics") + res = requests.get("http://{}:{}/metrics".format(server_addr, server_port)) metric_content = res.text for content in metric_content.split('\n'): - if content.startwith('#'): + if content.startswith('#'): continue else: - res = re.match(r'(\w+)({.*}) (\w+)', + res = re.match(r'(\w+){(.*)} (\w+)', content) # match output by server metrics interface + if not res: + continue metric_name = res.group(1) model = res.group(2) value = res.group(3) @@ -549,6 +552,8 @@ def generate_metric_table(server_addr, server_port): model_table[model_name][metric_name] = value elif key == 'GPU': gpu_name = infos['gpu_uuid'] + if gpu_name not in gpu_table: + gpu_table[gpu_name] = {} gpu_table[gpu_name][metric_name] = value elif key == 'CPU': pass From 99af968776feee1995b021097d89a032595302ac Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 15 Dec 2022 18:03:37 +0800 Subject: [PATCH 22/48] add metrics in gradio client --- .../inference/fastdeploy_client/client_app.py | 150 ++++++++++++----- .../fastdeploy_client/http_client_manager.py | 159 ++++++++++++++++++ .../component/inference/fastdeploy_server.py | 5 +- visualdl/server/app.py | 34 ++-- 4 files changed, 293 insertions(+), 55 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py index fcc064b62..a6af465ff 100644 --- a/visualdl/component/inference/fastdeploy_client/client_app.py +++ b/visualdl/component/inference/fastdeploy_client/client_app.py @@ -15,7 +15,9 @@ import gradio as gr import numpy as np +from .http_client_manager import get_metric_data from .http_client_manager import HttpClientManager +from .http_client_manager import metrics_table_head from .visualizer import visualize_detection from .visualizer import visualize_face_alignment from .visualizer import visualize_face_detection @@ -137,80 +139,89 @@ def create_gradio_client_app(): # noqa:C901 with gr.Column(): with gr.Row(): server_addr_text = gr.Textbox( - label="Server address", + label="服务ip", show_label=True, max_lines=1, - placeholder="localhost:8000", + placeholder="localhost", ) + server_http_port_text = gr.Textbox( + label="推理服务端口", + show_label=True, + max_lines=1, + placeholder="8000", + ) + + server_metric_port_text = gr.Textbox( + label="性能服务端口", + show_label=True, + max_lines=1, + placeholder="8002", + ) + with gr.Row(): model_name_text = gr.Textbox( - label="model name", + label="模型名称", show_label=True, max_lines=1, placeholder="yolov5", ) - model_version_text = gr.Textbox( - label="model version", + label="模型版本", show_label=True, max_lines=1, placeholder="1", ) with gr.Box(): - with gr.Tab("component format"): - check_button = gr.Button("GetInputOutputName") + with gr.Tab("组件形式"): + check_button = gr.Button("获取模型输入输出") component_format_column = gr.Column(visible=False) with component_format_column: task_radio = gr.Radio( choices=list(supported_tasks.keys()), value='unspecified', - label='task type', + label='任务类型', visible=True) - gr.Markdown( - "Fill inputs according to your need, choose either image or text for each input." - ) + gr.Markdown("根据模型需要,挑选文本框或者图像框进行输入") with gr.Row(): with gr.Column(): - gr.Markdown("Inputs") + gr.Markdown("模型输入") input_accordions = [] input_name_texts = [] input_images = [] input_texts = [] for i in range(6): accordion = gr.Accordion( - "input {}".format(i), + "输入变量 {}".format(i), open=True, visible=False) with accordion: input_name_text = gr.Textbox( - label="input name", - interactive=False) + label="变量名", interactive=False) input_image = gr.Image(type='numpy') input_text = gr.Textbox( - label="contents", max_lines=1000) + label="文本框", max_lines=1000) input_accordions.append(accordion) input_name_texts.append(input_name_text) input_images.append(input_image) input_texts.append(input_text) with gr.Column(): - gr.Markdown("Outputs") + gr.Markdown("模型输出") output_accordions = [] output_name_texts = [] output_images = [] output_texts = [] for i in range(6): accordion = gr.Accordion( - "output {}".format(i), + "输出变量 {}".format(i), open=True, visible=False) with accordion: output_name_text = gr.Textbox( - label="output name", - interactive=False) + label="变量名", interactive=False) output_text = gr.Textbox( - label="raw data", + label="服务返回的原数据", interactive=False, show_label=True) output_image = gr.Image( @@ -219,26 +230,40 @@ def create_gradio_client_app(): # noqa:C901 output_name_texts.append(output_name_text) output_images.append(output_image) output_texts.append(output_text) - component_submit_button = gr.Button("submit") - with gr.Tab("raw format"): + component_submit_button = gr.Button("提交请求") + with gr.Tab("源格式"): + gr.Markdown("模型输入") raw_payload_text = gr.Textbox( - label="request payload", max_lines=10000) + label="负载数据", max_lines=10000) with gr.Column(): - gr.Markdown("Outputs") + gr.Markdown("输出") output_raw_text = gr.Textbox( - label="raw data", interactive=False) - raw_submit_button = gr.Button("submit") + label="服务返回的原数据", interactive=False) + raw_submit_button = gr.Button("提交请求") + + with gr.Box(): + with gr.Column(): + gr.Markdown("服务性能统计(每次提交请求会自动更新数据,您也可以手动点击更新)") + update_metric_button = gr.Button("更新数据") + output_html_table = gr.Textbox( + label="metrics", + interactive=False, + show_label=False, + value=metrics_table_head.format('', '')) status_text = gr.Textbox( label="status", show_label=True, max_lines=1, interactive=False) + all_input_output_components = input_accordions + input_name_texts + input_images + \ input_texts + output_accordions + output_name_texts + output_images + output_texts - def get_input_output_name(server_addr, model_name, model_version): + def get_input_output_name(server_ip, server_port, model_name, + model_version): try: + server_addr = server_ip + ':' + server_port input_metas, output_metas = _http_manager.get_model_meta( server_addr, model_name, model_version) except Exception as e: @@ -263,17 +288,20 @@ def get_input_output_name(server_addr, model_name, model_version): return results def component_inference(*args): - server_addr = args[0] - model_name = args[1] - model_version = args[2] - names = args[3:3 + len(input_name_texts)] - images = args[3 + len(input_name_texts):3 + len(input_name_texts) + + server_ip = args[0] + http_port = args[1] + metric_port = args[2] + model_name = args[3] + model_version = args[4] + names = args[5:5 + len(input_name_texts)] + images = args[5 + len(input_name_texts):5 + len(input_name_texts) + len(input_images)] - texts = args[3 + len(input_name_texts) + len(input_images):3 + + texts = args[5 + len(input_name_texts) + len(input_images):5 + len(input_name_texts) + len(input_images) + len(input_texts)] task_type = args[-1] - if server_addr and model_name and model_version: + server_addr = server_ip + ':' + http_port + if server_ip and http_port and model_name and model_version: inputs = {} for i, input_name in enumerate(names): if input_name: @@ -296,6 +324,9 @@ def component_inference(*args): task_type](images[0], data) except Exception: results[output_images[i]] = None + if metric_port: + html_table = get_metric_data(server_ip, metric_port) + results[output_html_table] = html_table return results except Exception as e: return {status_text: 'Error: {}'.format(e)} @@ -306,10 +337,13 @@ def component_inference(*args): } def raw_inference(*args): - server_addr = args[0] - model_name = args[1] - model_version = args[2] - payload_text = args[3] + server_ip = args[0] + http_port = args[1] + metric_port = args[2] + model_name = args[3] + model_version = args[4] + payload_text = args[5] + server_addr = server_ip + ':' + http_port try: result = _http_manager.raw_infer(server_addr, model_name, model_version, payload_text) @@ -317,13 +351,34 @@ def raw_inference(*args): status_text: 'Get response from server', output_raw_text: result } + if server_ip and metric_port: + html_table = get_metric_data(server_ip, metric_port) + results[output_html_table] = html_table return results except Exception as e: return {status_text: 'Error: {}'.format(e)} + def update_metric(server_ip, metrics_port): + if server_ip and metrics_port: + try: + html_table = get_metric_data(server_ip, metrics_port) + return { + output_html_table: html_table, + status_text: "Successfully update metrics." + } + except Exception as e: + return {status_text: 'Error: {}'.format(e)} + else: + return { + status_text: 'Please input server ip and metrics_port.' + } + check_button.click( fn=get_input_output_name, - inputs=[server_addr_text, model_name_text, model_version_text], + inputs=[ + server_addr_text, server_http_port_text, model_name_text, + model_version_text + ], outputs=[ *all_input_output_components, check_button, component_format_column, status_text @@ -331,17 +386,24 @@ def raw_inference(*args): component_submit_button.click( fn=component_inference, inputs=[ - server_addr_text, model_name_text, model_version_text, + server_addr_text, server_http_port_text, + server_metric_port_text, model_name_text, model_version_text, *input_name_texts, *input_images, *input_texts, task_radio ], outputs=[ - *output_name_texts, *output_images, *output_texts, status_text + *output_name_texts, *output_images, *output_texts, status_text, + output_html_table ]) raw_submit_button.click( fn=raw_inference, inputs=[ - server_addr_text, model_name_text, model_version_text, + server_addr_text, server_http_port_text, + server_metric_port_text, model_name_text, model_version_text, raw_payload_text ], - outputs=[output_raw_text, status_text]) + outputs=[output_raw_text, status_text, output_html_table]) + update_metric_button.click( + fn=update_metric, + inputs=[server_addr_text, server_metric_port_text], + outputs=[output_html_table, status_text]) return block diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index 0fb300023..cf40a1194 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -13,6 +13,7 @@ # limitations under the License. # ======================================================================= import json +import re import requests import tritonclient.http as httpclient @@ -50,6 +51,164 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta): return inputs, outputs +metrics_table_head = """ + + +
+ + + + + + + + + + + + + + + + + + + {} +
模型名称执行统计延迟统计
请求处理成功数请求处理失败数推理batch数推理样本数请求处理时间任务队列等待时间输入处理时间模型推理时间输出处理时间
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + {} +
GPU性能指标显存
利用率功率功率限制耗电量总量已使用
+
+""" + + +def get_metric_data(server_addr, metric_port): # noqa:C901 + ''' + Get metrics data from fastdeploy server, and transform it into html table. + Args: + server_addr(str): fastdeployserver ip address + metric_port(int): fastdeployserver metrics port + Returns: + htmltable(str): html table to show metrics data + ''' + model_table = {} + gpu_table = {} + metric_column_name = { + "Model": { + "nv_inference_request_success", "nv_inference_request_failure", + "nv_inference_count", "nv_inference_exec_count", + "nv_inference_request_duration_us", + "nv_inference_queue_duration_us", + "nv_inference_compute_input_duration_us", + "nv_inference_compute_infer_duration_us", + "nv_inference_compute_output_duration_us" + }, + "GPU": { + "nv_gpu_power_usage", "nv_gpu_power_limit", + "nv_energy_consumption", "nv_gpu_utilization", + "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes" + }, + "CPU": { + "nv_cpu_utilization", "nv_cpu_memory_total_bytes", + "nv_cpu_memory_used_bytes" + } + } + res = requests.get("http://{}:{}/metrics".format(server_addr, metric_port)) + metric_content = res.text + for content in metric_content.split('\n'): + if content.startswith('#'): + continue + else: + res = re.match(r'(\w+){(.*)} (\w+)', + content) # match output by server metrics interface + if not res: + continue + metric_name = res.group(1) + model = res.group(2) + value = res.group(3) + infos = {} + for info in model.split(','): + k, v = info.split('=') + v = v.strip('"') + infos[k] = v + for key, metric_names in metric_column_name.items(): + if metric_name in metric_names: + if key == 'Model': + model_name = infos['model'] + if model_name not in model_table: + model_table[model_name] = {} + model_table[model_name][metric_name] = value + elif key == 'GPU': + gpu_name = infos['gpu_uuid'] + if gpu_name not in gpu_table: + gpu_table[gpu_name] = {} + gpu_table[gpu_name][metric_name] = value + elif key == 'CPU': + pass + model_data_list = [] + gpu_data_list = [] + model_data_metric_names = [ + "nv_inference_request_success", "nv_inference_request_failure", + "nv_inference_exec_count", "nv_inference_count", + "nv_inference_request_duration_us", "nv_inference_queue_duration_us", + "nv_inference_compute_input_duration_us", + "nv_inference_compute_infer_duration_us", + "nv_inference_compute_output_duration_us" + ] + gpu_data_metric_names = [ + "nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit", + "nv_energy_consumption", "nv_gpu_memory_total_bytes", + "nv_gpu_memory_used_bytes" + ] + for k, v in model_table.items(): + data = [] + data.append(k) + for data_metric in model_data_metric_names: + data.append(v[data_metric]) + model_data_list.append(data) + for k, v in gpu_table.items(): + data = [] + data.append(k) + for data_metric in gpu_data_metric_names: + data.append(v[data_metric]) + gpu_data_list.append(data) + model_data = '\n'.join([ + "" + '\n'.join(["" + item + "" + for item in data]) + "" + for data in model_data_list + ]) + gpu_data = '\n'.join([ + "" + '\n'.join(["" + item + "" + for item in data]) + "" + for data in gpu_data_list + ]) + return metrics_table_head.format(model_data, gpu_data) + + class HttpClientManager: def __init__(self): self.clients = {} # server url: httpclient diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 4b02f829a..47981dac4 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -113,9 +113,12 @@ def stop_server(self, server_id): # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn) kill_process(server_id) # check if there are servers killed by other vdl app instance and become zoombie + should_delete = [] for server_id, process in self.opened_servers.items(): if process.poll() is not None: - del self.opened_servers[server_id] + should_delete.append(server_id) + for server_id in should_delete: + del self.opened_servers[server_id] @result('text/plain') def get_server_output(self, server_id, length): diff --git a/visualdl/server/app.py b/visualdl/server/app.py index 8cf1dea42..94492fd78 100644 --- a/visualdl/server/app.py +++ b/visualdl/server/app.py @@ -234,31 +234,45 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str): server_id = request_args.get('server_id') start_args = get_start_arguments(server_id) http_port = start_args.get('http-port', '') + metrics_port = start_args.get('metrics-port', '') model_name = start_args.get('default_model_name', '') content = content.decode() try: default_server_addr = re.search( - r'"label": "Server address".*?"value": "".*?}', - content).group(0) + r'"label": "服务ip".*?"value": "".*?}', content).group(0) cur_server_addr = default_server_addr.replace( - '"value": ""', - '"value": "localhost:{}"'.format(http_port)) - default_model_name = re.search( - r'"label": "model name".*?"value": "".*?}', + '"value": ""', '"value": "localhost"') + default_http_port = re.search( + r'"label": "推理服务端口".*?"value": "".*?}', content).group(0) + cur_http_port = default_http_port.replace( + '"value": ""', '"value": "{}"'.format(http_port)) + default_metrics_port = re.search( + r'"label": "性能服务端口".*?"value": "".*?}', + content).group(0) + cur_metrics_port = default_metrics_port.replace( + '"value": ""', '"value": "{}"'.format(metrics_port)) + default_model_name = re.search( + r'"label": "模型名称".*?"value": "".*?}', content).group(0) cur_model_name = default_model_name.replace( '"value": ""', '"value": "{}"'.format(model_name)) default_model_version = re.search( - r'"label": "model version".*?"value": "".*?}', - content).group(0) + r'"label": "模型版本".*?"value": "".*?}', content).group(0) cur_model_version = default_model_version.replace( '"value": ""', '"value": "{}"'.format('1')) + + content = content.replace(default_server_addr, + cur_server_addr) if http_port: - content = content.replace(default_server_addr, - cur_server_addr) + content = content.replace(default_http_port, + cur_http_port) + if metrics_port: + content = content.replace(default_metrics_port, + cur_metrics_port) if model_name: content = content.replace(default_model_name, cur_model_name) + content = content.replace(default_model_version, cur_model_version) except Exception: From 1eb15fc7233a9e08f9c61c176b74d03ceede048f Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 16 Dec 2022 10:14:11 +0800 Subject: [PATCH 23/48] update presentation --- .../inference/fastdeploy_client/client_app.py | 8 +++---- .../fastdeploy_client/http_client_manager.py | 10 +++++--- .../component/inference/fastdeploy_lib.py | 9 ++++--- visualdl/server/app.py | 24 ++++++++++++------- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py index a6af465ff..e918775fe 100644 --- a/visualdl/component/inference/fastdeploy_client/client_app.py +++ b/visualdl/component/inference/fastdeploy_client/client_app.py @@ -231,25 +231,25 @@ def create_gradio_client_app(): # noqa:C901 output_images.append(output_image) output_texts.append(output_text) component_submit_button = gr.Button("提交请求") - with gr.Tab("源格式"): + with gr.Tab("原始形式"): gr.Markdown("模型输入") raw_payload_text = gr.Textbox( label="负载数据", max_lines=10000) with gr.Column(): gr.Markdown("输出") output_raw_text = gr.Textbox( - label="服务返回的原数据", interactive=False) + label="服务返回的原始数据", interactive=False) raw_submit_button = gr.Button("提交请求") with gr.Box(): with gr.Column(): gr.Markdown("服务性能统计(每次提交请求会自动更新数据,您也可以手动点击更新)") - update_metric_button = gr.Button("更新数据") - output_html_table = gr.Textbox( + output_html_table = gr.HTML( label="metrics", interactive=False, show_label=False, value=metrics_table_head.format('', '')) + update_metric_button = gr.Button("更新统计数据") status_text = gr.Textbox( label="status", diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index cf40a1194..4c29bd102 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -53,9 +53,9 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta): metrics_table_head = """
@@ -137,7 +137,11 @@ def get_metric_data(server_addr, metric_port): # noqa:C901 "nv_cpu_memory_used_bytes" } } - res = requests.get("http://{}:{}/metrics".format(server_addr, metric_port)) + try: + res = requests.get("http://{}:{}/metrics".format( + server_addr, metric_port)) + except Exception: + return metrics_table_head.format('', '') metric_content = res.text for content in metric_content.split('\n'): if content.startswith('#'): diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 80ff1800a..69d7be5d2 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -352,7 +352,7 @@ def launch_process(kwargs: dict): for key, value in kwargs.items(): if key == 'default_model_name': # Used to fill client model_name automatically start_args[key] = value - pass + continue cmd.append('--{}'.format(key)) cmd.append('{}'.format(value)) start_args[key] = value @@ -524,8 +524,11 @@ def check_process_alive(pid): def generate_metric_table(server_addr, server_port): model_table = {} gpu_table = {} - - res = requests.get("http://{}:{}/metrics".format(server_addr, server_port)) + try: + res = requests.get("http://{}:{}/metrics".format( + server_addr, server_port)) + except Exception: + return {} metric_content = res.text for content in metric_content.split('\n'): if content.startswith('#'): diff --git a/visualdl/server/app.py b/visualdl/server/app.py index 94492fd78..e451c4e21 100644 --- a/visualdl/server/app.py +++ b/visualdl/server/app.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ======================================================================= +import json import multiprocessing import os import re @@ -239,28 +240,35 @@ def request_fastdeploy_create_fastdeploy_client_app(path: str): content = content.decode() try: default_server_addr = re.search( - r'"label": "服务ip".*?"value": "".*?}', content).group(0) + '"label": {}.*?"value": "".*?}}'.format( + json.dumps("服务ip", ensure_ascii=True).replace( + '\\', '\\\\')), content).group(0) cur_server_addr = default_server_addr.replace( '"value": ""', '"value": "localhost"') default_http_port = re.search( - r'"label": "推理服务端口".*?"value": "".*?}', - content).group(0) + '"label": {}.*?"value": "".*?}}'.format( + json.dumps("推理服务端口", ensure_ascii=True).replace( + '\\', '\\\\')), content).group(0) cur_http_port = default_http_port.replace( '"value": ""', '"value": "{}"'.format(http_port)) default_metrics_port = re.search( - r'"label": "性能服务端口".*?"value": "".*?}', - content).group(0) + '"label": {}.*?"value": "".*?}}'.format( + json.dumps("性能服务端口", ensure_ascii=True).replace( + '\\', '\\\\')), content).group(0) cur_metrics_port = default_metrics_port.replace( '"value": ""', '"value": "{}"'.format(metrics_port)) default_model_name = re.search( - r'"label": "模型名称".*?"value": "".*?}', content).group(0) + '"label": {}.*?"value": "".*?}}'.format( + json.dumps("模型名称", ensure_ascii=True).replace( + '\\', '\\\\')), content).group(0) cur_model_name = default_model_name.replace( '"value": ""', '"value": "{}"'.format(model_name)) default_model_version = re.search( - r'"label": "模型版本".*?"value": "".*?}', content).group(0) + '"label": {}.*?"value": "".*?}}'.format( + json.dumps("模型版本", ensure_ascii=True).replace( + '\\', '\\\\')), content).group(0) cur_model_version = default_model_version.replace( '"value": ""', '"value": "{}"'.format('1')) - content = content.replace(default_server_addr, cur_server_addr) if http_port: From d6abc5ab9d5cdf576a845669f03a34126c15ec1a Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 16 Dec 2022 11:00:10 +0800 Subject: [PATCH 24/48] Change return value to None for frontend performance data when server not ready --- visualdl/component/inference/fastdeploy_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 69d7be5d2..6816be342 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -528,7 +528,7 @@ def generate_metric_table(server_addr, server_port): res = requests.get("http://{}:{}/metrics".format( server_addr, server_port)) except Exception: - return {} + return None metric_content = res.text for content in metric_content.split('\n'): if content.startswith('#'): From 41f5dfd5ea6890316e22b97bf60854174c60fd93 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 23 Dec 2022 16:42:33 +0800 Subject: [PATCH 25/48] add get_server_config and download_pretrain_model api --- .../inference/fastdeploy_client/client_app.py | 3 +- .../fastdeploy_client/http_client_manager.py | 7 +- .../component/inference/fastdeploy_lib.py | 194 ++++++++++++------ .../component/inference/fastdeploy_server.py | 43 ++-- 4 files changed, 171 insertions(+), 76 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py index e918775fe..7b28bdbe3 100644 --- a/visualdl/component/inference/fastdeploy_client/client_app.py +++ b/visualdl/component/inference/fastdeploy_client/client_app.py @@ -273,7 +273,7 @@ def get_input_output_name(server_ip, server_port, model_name, for component in all_input_output_components } results[component_format_column] = gr.update(visible=True) - results[check_button] = gr.update(visible=False) + # results[check_button] = gr.update(visible=False) for input_accordio in input_accordions: results[input_accordio] = gr.update(visible=False) for output_accordio in output_accordions: @@ -313,6 +313,7 @@ def component_inference(*args): try: infer_results = _http_manager.infer( server_addr, model_name, model_version, inputs) + print('infer_results', infer_results) results = {status_text: 'Inference Successful'} for i, (output_name, data) in enumerate(infer_results.items()): diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index 4c29bd102..0e796c8a1 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -241,8 +241,11 @@ def infer(self, server_url, model_name, model_version, inputs): for output in output_metadata: result = response.as_numpy(output.name) # datatype: numpy if output.datatype == 'BYTES': - result = result[0][0] # datatype: bytes - result = json.loads(result) # datatype: json + try: # maybe not vison tasks, normal text + value = result[0][0] # datatype: bytes + result = json.loads(value) # datatype: json + except Exception: + pass else: result = result[0] results[output.name] = result diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 6816be342..b7f971994 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -71,7 +71,6 @@ def analyse_config(cur_dir: str): Return a json object to describe configuration. ''' all_model_configs = {} - all_model_paths = {} all_model_versions = {} parent_dir, sub_dirs, filenames = os.walk(cur_dir).send( None) # models can only put directory in model repository, @@ -82,7 +81,6 @@ def analyse_config(cur_dir: str): model_name = os.path.basename(model_dir) for filename in filenames: if 'config.pbtxt' in filename: - all_model_paths[model_name] = model_dir # store model path json_config = json.loads( pbtxt2json(open(os.path.join(model_dir, filename)).read())) all_model_configs[ @@ -104,7 +102,7 @@ def analyse_config(cur_dir: str): if not all_model_configs: raise Exception( 'Not a valid model repository, please choose the right path') - return all_model_configs, all_model_versions, all_model_paths + return all_model_configs, all_model_versions def exchange_format_to_original_format(exchange_format): @@ -353,10 +351,18 @@ def launch_process(kwargs: dict): if key == 'default_model_name': # Used to fill client model_name automatically start_args[key] = value continue + if key == 'server-name' or key == 'ensemble-img': # extra information + start_args[key] = value + continue cmd.append('--{}'.format(key)) cmd.append('{}'.format(value)) start_args[key] = value + all_model_configs, all_model_versions, _ = analyse_config( + start_args['model-repository']) + model_repo_config = original_format_to_exchange_format( + all_model_configs, all_model_versions) + model_repo_config['ensemble-img'] = start_args['ensemble-img'] logfilename = 'logfile-{}'.format(get_random_string(8)) while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)): logfilename = 'logfile-{}'.format(get_random_string(8)) @@ -367,11 +373,18 @@ def launch_process(kwargs: dict): buffering=1), stderr=STDOUT, universal_newlines=True) - with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(p.pid)), - 'w') as f: - f.write( - logfilename + '\n' + json.dumps(start_args) - ) # filename ${p.pid} contain the real log filename ${logfilename}, and start arguments + server_name = start_args['server-name'] if start_args[ + 'server-name'] else p.pid + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_name)), + 'w') as f: + # filename ${server_name} contain 4 lines: + # line1 : the real log filename ${logfilename} + # line2 : pid + # line3 : launch arguments + # line4 : model-repository configuration + f.write(logfilename + '\n' + str(p.pid) + '\n' + + json.dumps(start_args) + '\n' + json.dumps(model_repo_config)) return p @@ -386,7 +399,7 @@ def get_start_arguments(server_id): ''' Get the start arguments for fastdeployserver process. Args: - server_id(int): fastdeployserver process id + server_id(str): fastdeployserver process name Returns: args(dict): launch arguments when start fastdeployserver process. ''' @@ -396,19 +409,73 @@ def get_start_arguments(server_id): with open( os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), 'r') as f: - f.readline() - args = json.loads(f.read()) + arguments_json = f.read().split('\n')[2] + args = json.loads(arguments_json) return args -def get_process_output(pid, length): +def get_process_pid(server_id): + ''' + Get the process id for fastdeployserver process. + Args: + server_id(str): fastdeployserver process name + Returns: + pid(int): process id. + ''' + pid = None + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))): + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), + 'r') as f: + pid = int(f.read().split('\n')[1]) + return pid + + +def get_process_logfile_name(server_id): + ''' + Get the process logfile name for fastdeployserver process. + Args: + server_id(str): fastdeployserver process name + Returns: + logfile(str): logfile name. + ''' + filename = None + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))): + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), + 'r') as f: + filename = int(f.read().split('\n')[0]) + return filename + + +def get_process_model_configuration(server_id): + ''' + Get the model repository configuration for fastdeployserver process. + Args: + server_id(str): fastdeployserver process name + Returns: + configuration(dict): model repository configuration + ''' + conf = {} + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))): + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), + 'r') as f: + conf_json = f.read().split('\n')[3] + conf = json.loads(conf_json) + return conf + + +def get_process_output(server_id, length): ''' Get the standard output of a opened subprocess. ''' - if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): - with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), - 'r') as f: - logfilename = f.readline().strip('\n') + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))): + logfilename = get_process_logfile_name(server_id) # delete file ${logfilename} if exists if os.path.exists( os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))): @@ -420,14 +487,31 @@ def get_process_output(pid, length): return data +def delete_files_for_process(server_id): + ''' + Delete logfile for fastdeployserver process. + Args: + server_id(str): fastdeployserver process name + ''' + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))): + logfilename = get_process_logfile_name(server_id) + # delete file ${logfilename} if exists + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))): + os.remove( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))) + os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))) + + def kill_process(process): ''' Stop a opened subprocess. ''' - if type(process) == int: # pid, use os.kill to terminate - pid = process + if type(process) == str: # server_id, use os.kill to terminate + pid = get_process_pid(process) try: - os.kill(process, signal.SIGKILL) + os.kill(pid, signal.SIGKILL) # delete file ${pid} if exists except Exception: pass @@ -438,60 +522,38 @@ def kill_process(process): process.wait(10) except Exception: pass - if os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): - with open(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), - 'r') as f: - logfilename = f.readline().strip('\n') - # delete file ${logfilename} if exists - if os.path.exists( - os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))): - os.remove( - os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))) - os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))) def get_alive_fastdeploy_servers(): ''' - Search pids in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \ + Search server names in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \ some unexpectable reasons, delete log file. ''' - pids = [ + server_names = [ name for name in os.listdir(FASTDEPLOYSERVER_PATH) if 'logfile' not in name ] - should_delete_pids = [] - for pid in pids: - if check_process_alive(pid) is False: - if os.path.exists( - os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))): - with open( - os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid)), - 'r') as f: - logfilename = f.readline().strip('\n') - # delete file ${logfilename} if exists - if os.path.exists( - os.path.join(FASTDEPLOYSERVER_PATH, - '{}'.format(logfilename))): - os.remove( - os.path.join(FASTDEPLOYSERVER_PATH, - '{}'.format(logfilename))) - os.remove( - os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(pid))) - should_delete_pids.append(pid) - for pid in should_delete_pids: - pids.remove(pid) - return pids - - -def check_process_alive(pid): + should_delete_servers = [] + for server_name in server_names: + if check_process_alive(server_name) is False: + delete_files_for_process(server_name) + should_delete_servers.append(server_name) + for server_name in should_delete_servers: + server_names.remove(server_name) + return server_names + + +def check_process_alive(server_id): ''' - Given a pid, check whether the process is alive or not. + Given a server id, check whether the process is alive or not. Args: - pid(int): process id + server_id(str): fastdeployserver process name Return: status(bool): True if process is still alive. ''' - pid = int(pid) + pid = get_process_pid(server_id) + if pid is None: + return False try: os.kill(pid, 0) except OSError: @@ -521,7 +583,7 @@ def check_process_alive(pid): } -def generate_metric_table(server_addr, server_port): +def generate_metric_table(server_addr, server_port): # noqa:C901 model_table = {} gpu_table = {} try: @@ -546,6 +608,18 @@ def generate_metric_table(server_addr, server_port): k, v = info.split('=') v = v.strip('"') infos[k] = v + if metric_name in [ + "nv_inference_request_duration_us", + "nv_inference_queue_duration_us", + "nv_inference_compute_input_duration_us", + "nv_inference_compute_infer_duration_us", + "nv_inference_compute_output_duration_us" + ]: + value = float(value) / 1000 + elif metric_name in [ + "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes" + ]: + value = float(value) / 1024 / 1024 / 1024 for key, metric_names in _metric_column_name.items(): if metric_name in metric_names: if key == 'Model': diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 47981dac4..e52eb6ec0 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -21,13 +21,16 @@ from multiprocessing import Process from pathlib import Path +import fastdeploy as fd import requests from .fastdeploy_client.client_app import create_gradio_client_app from .fastdeploy_lib import analyse_config +from .fastdeploy_lib import delete_files_for_process from .fastdeploy_lib import exchange_format_to_original_format from .fastdeploy_lib import generate_metric_table from .fastdeploy_lib import get_alive_fastdeploy_servers +from .fastdeploy_lib import get_process_model_configuration from .fastdeploy_lib import get_process_output from .fastdeploy_lib import get_start_arguments from .fastdeploy_lib import json2pbtxt @@ -46,7 +49,6 @@ def __init__(self): self.opened_servers = { } # Use to store the opened server process pid and process itself self.client_port = None - self.model_paths = {} @result() def get_directory(self, cur_dir): @@ -66,10 +68,7 @@ def get_directory(self, cur_dir): @result() def get_config(self, cur_dir): - all_model_configs, all_model_versions, all_model_paths = analyse_config( - cur_dir) - for name, value in all_model_paths.items(): - self.model_paths[(Path(os.path.abspath(cur_dir)), name)] = value + all_model_configs, all_model_versions = analyse_config(cur_dir) return original_format_to_exchange_format(all_model_configs, all_model_versions) @@ -77,8 +76,7 @@ def get_config(self, cur_dir): def config_update(self, cur_dir, model_name, config): config = json.loads(config) all_models = exchange_format_to_original_format(config) - model_dir = self.model_paths[(Path(os.path.abspath(cur_dir)), - model_name)] + model_dir = os.path.join(os.path.abspath(cur_dir), model_name) filtered_config = validate_data(all_models[model_name]) text_proto = json2pbtxt(json.dumps(filtered_config)) # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data @@ -99,19 +97,21 @@ def start_server(self, configs): raise RuntimeError( "Launch fastdeploy server failed, please check your launching arguments" ) - self.opened_servers[process.pid] = process - return process.pid + server_name = configs['server-name'] if configs[ + 'server-name'] else process.pid + self.opened_servers[server_name] = process + return server_name @result() def stop_server(self, server_id): - server_id = int(server_id) if server_id in self.opened_servers: # check if server_id in self.opened_servers kill_process(self.opened_servers[server_id]) del self.opened_servers[server_id] - elif str(server_id) in set( + elif server_id in set( os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn) kill_process(server_id) + delete_files_for_process(server_id) # check if there are servers killed by other vdl app instance and become zoombie should_delete = [] for server_id, process in self.opened_servers.items(): @@ -122,7 +122,6 @@ def stop_server(self, server_id): @result('text/plain') def get_server_output(self, server_id, length): - server_id = int(server_id) length = int(length) if server_id in self.opened_servers: # check if server_id in self.opened_servers return get_process_output(server_id, length) @@ -144,6 +143,20 @@ def get_server_metric(self, server_id): def get_server_list(self): return get_alive_fastdeploy_servers() + @result() + def get_server_config(self, server_id): + return get_process_model_configuration(server_id) + + @result() + def download_pretrain_model(self, cur_dir, model_name, version, + pretrain_model_name): + version_resource_dir = os.path.join( + os.path.abspath(cur_dir), model_name, version) + fd.download_model(name=pretrain_model_name, path=version_resource_dir) + os.system('mv {}/{}/* {} && rm -r {}/{}'.format( + version_resource_dir, pretrain_model_name, version_resource_dir, + version_resource_dir, pretrain_model_name)) + def create_fastdeploy_client(self): if self.client_port is None: @@ -185,7 +198,11 @@ def create_fastdeploy_api_call(): 'get_server_output': (api.get_server_output, ['server_id', 'length']), 'create_fastdeploy_client': (api.create_fastdeploy_client, []), 'get_server_list': (api.get_server_list, []), - 'get_server_metric': (api.get_server_metric, ['server_id']) + 'get_server_metric': (api.get_server_metric, ['server_id']), + 'get_server_config': (api.get_server_config, ['server_id']), + 'download_pretrain_model': + (api.download_pretrain_model, + ['dir', 'name', 'version', 'pretrain_model_name']), } def call(path: str, args): From 042189248915e63e52977088b659148b0650137e Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 23 Dec 2022 17:07:34 +0800 Subject: [PATCH 26/48] add get_server_config and download_pretrain_model api --- visualdl/component/inference/fastdeploy_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index b7f971994..617850ce9 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -358,7 +358,7 @@ def launch_process(kwargs: dict): cmd.append('{}'.format(value)) start_args[key] = value - all_model_configs, all_model_versions, _ = analyse_config( + all_model_configs, all_model_versions = analyse_config( start_args['model-repository']) model_repo_config = original_format_to_exchange_format( all_model_configs, all_model_versions) From e7c9e5323aa82b07c8a7642106eed237ec542249 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 27 Dec 2022 17:46:39 +0800 Subject: [PATCH 27/48] add unit for metric table --- .../fastdeploy_client/http_client_manager.py | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index 0e796c8a1..cf0b8a680 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -71,11 +71,11 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta): 请求处理失败数 推理batch数 推理样本数 - 请求处理时间 - 任务队列等待时间 - 输入处理时间 - 模型推理时间 - 输出处理时间 + 请求处理时间(ms) + 任务队列等待时间(ms) + 输入处理时间(ms) + 模型推理时间(ms) + 输出处理时间(ms) {} @@ -93,12 +93,12 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta): 显存 - 利用率 - 功率 - 功率限制 - 耗电量 - 总量 - 已使用 + 利用率(%) + 功率(W) + 功率限制(W) + 耗电量(W) + 总量(GB) + 已使用(GB) {} @@ -159,6 +159,18 @@ def get_metric_data(server_addr, metric_port): # noqa:C901 k, v = info.split('=') v = v.strip('"') infos[k] = v + if metric_name in [ + "nv_inference_request_duration_us", + "nv_inference_queue_duration_us", + "nv_inference_compute_input_duration_us", + "nv_inference_compute_infer_duration_us", + "nv_inference_compute_output_duration_us" + ]: + value = float(value) / 1000 + elif metric_name in [ + "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes" + ]: + value = float(value) / 1024 / 1024 / 1024 for key, metric_names in metric_column_name.items(): if metric_name in metric_names: if key == 'Model': From 4c67a0229897030aa30cee0b16476188dc0f4f96 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 27 Dec 2022 18:07:34 +0800 Subject: [PATCH 28/48] add unit for metric table --- .../inference/fastdeploy_client/http_client_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index cf0b8a680..47872e9b1 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -166,11 +166,11 @@ def get_metric_data(server_addr, metric_port): # noqa:C901 "nv_inference_compute_infer_duration_us", "nv_inference_compute_output_duration_us" ]: - value = float(value) / 1000 + value = str(float(value) / 1000) elif metric_name in [ "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes" ]: - value = float(value) / 1024 / 1024 / 1024 + value = str(float(value) / 1024 / 1024 / 1024) for key, metric_names in metric_column_name.items(): if metric_name in metric_names: if key == 'Model': From 15c23405e017ab5d3b5f80af8d772a9e0c8bf56a Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 28 Dec 2022 12:05:16 +0800 Subject: [PATCH 29/48] fix a bug --- visualdl/component/inference/fastdeploy_lib.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 617850ce9..3163c9a66 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -346,6 +346,7 @@ def launch_process(kwargs: dict): Launch a fastdeploy server according to specified arguments. ''' cmd = ['fastdeployserver'] + launch_env = os.environ.copy() start_args = {} for key, value in kwargs.items(): if key == 'default_model_name': # Used to fill client model_name automatically @@ -354,6 +355,10 @@ def launch_process(kwargs: dict): if key == 'server-name' or key == 'ensemble-img': # extra information start_args[key] = value continue + if key == 'gpus': + launch_env['CUDA_VISIBLE_DEVICES'] = value + start_args[key] = value + continue cmd.append('--{}'.format(key)) cmd.append('{}'.format(value)) start_args[key] = value @@ -372,7 +377,8 @@ def launch_process(kwargs: dict): os.path.join(FASTDEPLOYSERVER_PATH, logfilename), 'w', buffering=1), stderr=STDOUT, - universal_newlines=True) + universal_newlines=True, + env=launch_env) server_name = start_args['server-name'] if start_args[ 'server-name'] else p.pid with open( @@ -446,7 +452,7 @@ def get_process_logfile_name(server_id): with open( os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), 'r') as f: - filename = int(f.read().split('\n')[0]) + filename = f.read().split('\n')[0] return filename From f4e6a95c1967748f7c714b5433985451d564a6fb Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 28 Dec 2022 15:16:16 +0800 Subject: [PATCH 30/48] add judgement pretrained model download --- visualdl/component/inference/fastdeploy_server.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index e52eb6ec0..cd4d6309c 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -152,10 +152,17 @@ def download_pretrain_model(self, cur_dir, model_name, version, pretrain_model_name): version_resource_dir = os.path.join( os.path.abspath(cur_dir), model_name, version) - fd.download_model(name=pretrain_model_name, path=version_resource_dir) - os.system('mv {}/{}/* {} && rm -r {}/{}'.format( - version_resource_dir, pretrain_model_name, version_resource_dir, - version_resource_dir, pretrain_model_name)) + model_path = fd.download_model( + name=pretrain_model_name, path=version_resource_dir) + if model_path: + os.system('mv {}/{}/* {} && rm -r {}/{}'.format( + version_resource_dir, pretrain_model_name, + version_resource_dir, version_resource_dir, + pretrain_model_name)) + else: + raise RuntimeError( + "No pretrained model named {} can be downloaded".format( + pretrain_model_name)) def create_fastdeploy_client(self): if self.client_port is None: From a31c40ccde3209f6e7b484b57ddcbcebec1f632e Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 28 Dec 2022 15:46:14 +0800 Subject: [PATCH 31/48] add judgement pretrained model download --- visualdl/component/inference/fastdeploy_lib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 3163c9a66..0e35fffc8 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -356,8 +356,9 @@ def launch_process(kwargs: dict): start_args[key] = value continue if key == 'gpus': - launch_env['CUDA_VISIBLE_DEVICES'] = value - start_args[key] = value + if value: + launch_env['CUDA_VISIBLE_DEVICES'] = value + start_args[key] = value continue cmd.append('--{}'.format(key)) cmd.append('{}'.format(value)) From 8d36f91204dd95dbeba2bc1e958c1f89e823ee3d Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 28 Dec 2022 17:45:39 +0800 Subject: [PATCH 32/48] add version info for frontend --- .../component/inference/fastdeploy_server.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index cd4d6309c..44ff6023d 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -15,6 +15,7 @@ import datetime import json import os +import re import shutil import socket import time @@ -159,6 +160,25 @@ def download_pretrain_model(self, cur_dir, model_name, version, version_resource_dir, pretrain_model_name, version_resource_dir, version_resource_dir, pretrain_model_name)) + version_info_for_frontend = [] + for version_name in os.listdir(os.path.join(cur_dir, model_name)): + if re.match( + r'\d+', + version_name): # version directory consists of numbers + version_filenames_dict_for_frontend = {} + version_filenames_dict_for_frontend['title'] = version_name + version_filenames_dict_for_frontend['key'] = version_name + version_filenames_dict_for_frontend['children'] = [] + for filename in os.listdir( + os.path.join(cur_dir, model_name, version_name)): + version_filenames_dict_for_frontend['children'].append( + { + 'title': filename, + 'key': filename + }) + version_info_for_frontend.append( + version_filenames_dict_for_frontend) + return version_info_for_frontend else: raise RuntimeError( "No pretrained model named {} can be downloaded".format( From cefa9f754105066b1e5c8aa5c55ee349864f3180 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 29 Dec 2022 11:38:59 +0800 Subject: [PATCH 33/48] rename download model --- visualdl/component/inference/fastdeploy_server.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 44ff6023d..a1aee8047 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -156,10 +156,15 @@ def download_pretrain_model(self, cur_dir, model_name, version, model_path = fd.download_model( name=pretrain_model_name, path=version_resource_dir) if model_path: - os.system('mv {}/{}/* {} && rm -r {}/{}'.format( - version_resource_dir, pretrain_model_name, - version_resource_dir, version_resource_dir, - pretrain_model_name)) + if '.onnx' in model_path: + os.system('mv {} {}/{}'.format(model_path, + os.path.dirname(model_path), + 'model.onnx')) + else: + os.system('mv {}/{}/* {} && rm -r {}/{}'.format( + version_resource_dir, pretrain_model_name, + version_resource_dir, version_resource_dir, + pretrain_model_name)) version_info_for_frontend = [] for version_name in os.listdir(os.path.join(cur_dir, model_name)): if re.match( From 86caf85ed72c889e7bd6afe268ed7662818983eb Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 29 Dec 2022 16:25:57 +0800 Subject: [PATCH 34/48] fix a bug --- visualdl/component/inference/fastdeploy_lib.py | 8 ++++---- .../component/inference/fastdeploy_server.py | 18 +++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 0e35fffc8..c8470502a 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -91,12 +91,12 @@ def analyse_config(cur_dir: str): if re.match( r'\d+', model_sub_dir): # version directory consists of numbers + if model_name not in all_model_versions: + all_model_versions[model_name] = {} + if model_sub_dir not in all_model_versions[model_name]: + all_model_versions[model_name][model_sub_dir] = [] for version_resource_file in os.listdir( os.path.join(model_dir, model_sub_dir)): - if model_name not in all_model_versions: - all_model_versions[model_name] = {} - if model_sub_dir not in all_model_versions[model_name]: - all_model_versions[model_name][model_sub_dir] = [] all_model_versions[model_name][model_sub_dir].append( version_resource_file) if not all_model_configs: diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index a1aee8047..85fb58d7b 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -157,14 +157,18 @@ def download_pretrain_model(self, cur_dir, model_name, version, name=pretrain_model_name, path=version_resource_dir) if model_path: if '.onnx' in model_path: - os.system('mv {} {}/{}'.format(model_path, - os.path.dirname(model_path), - 'model.onnx')) + shutil.move( + model_path, + os.path.join(os.path.dirname(model_path), 'model.onnx')) else: - os.system('mv {}/{}/* {} && rm -r {}/{}'.format( - version_resource_dir, pretrain_model_name, - version_resource_dir, version_resource_dir, - pretrain_model_name)) + for filename in os.listdir(model_path): + if '.pdmodel' in filename or '.pdiparams' in filename: + shutil.move( + os.path.join(model_path, filename), + os.path.join( + os.path.dirname(model_path), 'model{}'.format( + os.path.splitext(filename)[1]))) + shutil.rmtree(model_path) version_info_for_frontend = [] for version_name in os.listdir(os.path.join(cur_dir, model_name)): if re.match( From 8c50447b15bb83a707eafd476895404f74f35ac8 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 29 Dec 2022 20:41:14 +0800 Subject: [PATCH 35/48] add fastdeploy model list --- .../component/inference/fastdeploy_server.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 85fb58d7b..989656f67 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -148,6 +148,39 @@ def get_server_list(self): def get_server_config(self, server_id): return get_process_model_configuration(server_id) + @result() + def get_pretrain_model_list(self): + ''' + Get all available fastdeploy models from hub server. + ''' + res = requests.get( + 'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels') + result = res.json() + if result['status'] != 0: + raise RuntimeError("Can not get model list from hub model server.") + else: + data = result['data'] + model_list = {} + for category, models in data.items(): + if category not in model_list: + model_list[category] = set() + for model in models: + model_list[category].add(model['name']) + # adapt data format for frontend + models_info = [] + for category, model_names in model_list.items(): + models_info.append({ + "value": category, + "label": category, + "children": [] + }) + for model_name in sorted(model_names): + models_info[-1]["children"].append({ + "value": model_name, + "label": model_name + }) + return models_info + @result() def download_pretrain_model(self, cur_dir, model_name, version, pretrain_model_name): @@ -236,6 +269,7 @@ def create_fastdeploy_api_call(): 'get_server_list': (api.get_server_list, []), 'get_server_metric': (api.get_server_metric, ['server_id']), 'get_server_config': (api.get_server_config, ['server_id']), + 'get_pretrain_model_list': (api.get_pretrain_model_list, []), 'download_pretrain_model': (api.download_pretrain_model, ['dir', 'name', 'version', 'pretrain_model_name']), From db7ba0bcd7994a86dc5d2b6fe60f2b3b07ec7d35 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 3 Jan 2023 16:53:06 +0800 Subject: [PATCH 36/48] optimize for choose configuration files --- .../component/inference/fastdeploy_lib.py | 82 +++++++++++++++++-- .../component/inference/fastdeploy_server.py | 60 +++++++++++--- 2 files changed, 120 insertions(+), 22 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index c8470502a..dda957da1 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -17,6 +17,7 @@ import os import random import re +import shutil import signal import string from collections import defaultdict @@ -79,14 +80,38 @@ def analyse_config(cur_dir: str): model_dir, model_sub_dirs, filenames = os.walk( os.path.join(parent_dir, model_dir_name)).send(None) model_name = os.path.basename(model_dir) + config_filenames = [] for filename in filenames: - if 'config.pbtxt' in filename: - json_config = json.loads( - pbtxt2json(open(os.path.join(model_dir, filename)).read())) - all_model_configs[ - model_name] = json_config # store original config file content in json format - if 'name' not in json_config: - json_config['name'] = model_name + if '.pbtxt' in filename: + config_filenames.append( + filename + ) # filenames with extension .pbtxt are all config files + if config_filenames: + default_config_filename = config_filenames[0] + if 'config.pbtxt' in config_filenames: + default_config_filename = 'config.pbtxt' + config_filenames.remove(default_config_filename) + config_filenames.insert(0, default_config_filename) + else: + # if no config.pbtxt, we choose the first file in config_filenames list to create config.pbtxt + shutil.copy( + os.path.join(model_dir, default_config_filename), + os.path.join(model_dir, 'config.pbtxt')) + default_config_filename = 'config.pbtxt' + config_filenames.insert(0, default_config_filename) + json_config = json.loads( + pbtxt2json( + open(os.path.join(model_dir, + default_config_filename)).read())) + json_config[ + "config_filenames"] = config_filenames # add config_filenames to config data + all_model_configs[ + model_name] = json_config # store original config file content in json format + json_config[ + 'name'] = model_name # because name in config data may be different from model_name, + # model_name is model directory name actually, we should conform name with model_name. + else: + continue for model_sub_dir in model_sub_dirs: if re.match( r'\d+', @@ -100,8 +125,7 @@ def analyse_config(cur_dir: str): all_model_versions[model_name][model_sub_dir].append( version_resource_file) if not all_model_configs: - raise Exception( - 'Not a valid model repository, please choose the right path') + raise Exception('所选择的路径不是一个有效的模型库,请选择正确的路径') return all_model_configs, all_model_versions @@ -128,6 +152,8 @@ def exchange_format_to_original_format(exchange_format): # 2. delete versions information if 'versions' in model_config: del model_config['versions'] + if 'config_filenames' in model_config: + del model_config['config_filenames'] if 'platform' in model_config and model_config[ 'platform'] == 'ensemble': # emsemble model # 3. add 'ensembleScheduling' keyword @@ -296,6 +322,44 @@ def analyse_step_relationships(step_config, inputs, outputs): # noqa: C901 calculate_layout_for_frontend(models_dict) +def get_config_filenames_for_one_model(cur_dir, name): + _, _, filenames = os.walk(os.path.join(cur_dir, name)).send(None) + config_filenames = [] + for filename in filenames: + if '.pbtxt' in filename: + config_filenames.append( + filename + ) # filenames with extension .pbtxt are all config files + return config_filenames + + +def get_config_for_one_model(cur_dir, name, config_filename): + all_model_configs = {} + all_model_versions = {} + filename = os.path.join(cur_dir, name, config_filename) + json_config = json.loads(pbtxt2json(open(filename).read())) + if 'name' not in json_config: + json_config['name'] = name + all_model_configs[ + name] = json_config # store original config file content in json format + all_model_versions[name] = {} + for model_sub_dir in os.listdir(os.path.join(cur_dir, name)): + if re.match(r'\d+', + model_sub_dir): # version directory consists of numbers + if model_sub_dir not in all_model_versions[name]: + all_model_versions[name][model_sub_dir] = [] + for version_resource_file in os.listdir( + os.path.join(cur_dir, name, model_sub_dir)): + all_model_versions[name][model_sub_dir].append( + version_resource_file) + model_config = original_format_to_exchange_format(all_model_configs, + all_model_versions) + if model_config['ensembles']: + return model_config['ensembles'][0] + elif model_config['models']: + return model_config['models'][0] + + def calculate_layout_for_frontend(model_config_in_step): ''' Analyse model topology connections and prepare the positions for each model in layout. diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 989656f67..5e0dc143b 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -31,6 +31,8 @@ from .fastdeploy_lib import exchange_format_to_original_format from .fastdeploy_lib import generate_metric_table from .fastdeploy_lib import get_alive_fastdeploy_servers +from .fastdeploy_lib import get_config_filenames_for_one_model +from .fastdeploy_lib import get_config_for_one_model from .fastdeploy_lib import get_process_model_configuration from .fastdeploy_lib import get_process_output from .fastdeploy_lib import get_start_arguments @@ -74,19 +76,25 @@ def get_config(self, cur_dir): all_model_versions) @result() - def config_update(self, cur_dir, model_name, config): + def config_update(self, cur_dir, model_name, config, config_filename): config = json.loads(config) all_models = exchange_format_to_original_format(config) model_dir = os.path.join(os.path.abspath(cur_dir), model_name) filtered_config = validate_data(all_models[model_name]) text_proto = json2pbtxt(json.dumps(filtered_config)) - # backup user's config.pbtxt first, when data corrupted by front-end, we still can recovery data + # backup user's config data first, when data corrupted by front-end, we still can recovery data + # backup config filename: {original_name}_vdlbackup_{datetime}.pbtxt + # backup config can only used to restore config.pbtxt + if 'vdlbackup' in config_filename: + raise RuntimeError("备份的配置文件不允许修改") + basename = os.path.splitext(config_filename)[0] shutil.copy( - os.path.join(model_dir, 'config.pbtxt'), + os.path.join(model_dir, config_filename), os.path.join( - model_dir, 'config_vdlbackup_{}.pbtxt'.format( + model_dir, '{}_vdlbackup_{}.pbtxt'.format( + basename, datetime.datetime.now().isoformat()))) - with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f: + with open(os.path.join(model_dir, config_filename), 'w') as f: f.write(text_proto) return @@ -95,9 +103,7 @@ def start_server(self, configs): configs = json.loads(configs) process = launch_process(configs) if process.poll() is not None: - raise RuntimeError( - "Launch fastdeploy server failed, please check your launching arguments" - ) + raise RuntimeError("启动fastdeployserver服务器失败,请检查启动参数") server_name = configs['server-name'] if configs[ 'server-name'] else process.pid self.opened_servers[server_name] = process @@ -157,7 +163,7 @@ def get_pretrain_model_list(self): 'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels') result = res.json() if result['status'] != 0: - raise RuntimeError("Can not get model list from hub model server.") + raise RuntimeError("从hub的模型服务器请求模型列表失败") else: data = result['data'] model_list = {} @@ -222,9 +228,30 @@ def download_pretrain_model(self, cur_dir, model_name, version, version_filenames_dict_for_frontend) return version_info_for_frontend else: - raise RuntimeError( - "No pretrained model named {} can be downloaded".format( - pretrain_model_name)) + raise RuntimeError("预训练模型{}下载失败".format(pretrain_model_name)) + + @result() + def get_config_for_model(self, cur_dir, name, config_filename): + return get_config_for_one_model(cur_dir, name, config_filename) + + @result() + def get_config_filenames_for_model(self, cur_dir, name): + return get_config_filenames_for_one_model(cur_dir, name) + + @result() + def set_default_config_for_model(self, cur_dir, name, config_filename): + model_dir = os.path.join(os.path.abspath(cur_dir), name) + # backup config.pbtxt to config_vdlbackup_{datetime}.pbtxt + if os.path.exists(os.path.join(model_dir, 'config.pbtxt')): + shutil.copy( + os.path.join(model_dir, 'config.pbtxt'), + os.path.join( + model_dir, 'config_vdlbackup_{}.pbtxt'.format( + datetime.datetime.now().isoformat()))) + shutil.copy( + os.path.join(model_dir, config_filename), + os.path.join(model_dir, 'config.pbtxt')) + return def create_fastdeploy_client(self): if self.client_port is None: @@ -260,8 +287,15 @@ def create_fastdeploy_api_call(): api = FastDeployServerApi() routes = { 'get_directory': (api.get_directory, ['dir']), - 'config_update': (api.config_update, ['dir', 'name', 'config']), + 'config_update': (api.config_update, + ['dir', 'name', 'config', 'config_filename']), 'get_config': (api.get_config, ['dir']), + 'get_config_filenames_for_model': (api.get_config_filenames_for_model, + ['dir', 'name']), + 'get_config_for_model': (api.get_config_for_model, + ['dir', 'name', 'config_filename']), + 'set_default_config_for_model': (api.set_default_config_for_model, + ['dir', 'name', 'config_filename']), 'start_server': (api.start_server, ['config']), 'stop_server': (api.stop_server, ['server_id']), 'get_server_output': (api.get_server_output, ['server_id', 'length']), From 6ac3b4b8753ab60ed1e4b039db7b4b4ea5efe0fd Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 4 Jan 2023 20:50:15 +0800 Subject: [PATCH 37/48] modify according to frontend need --- visualdl/component/inference/fastdeploy_lib.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index dda957da1..b711c92cc 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -103,8 +103,9 @@ def analyse_config(cur_dir: str): pbtxt2json( open(os.path.join(model_dir, default_config_filename)).read())) - json_config[ - "config_filenames"] = config_filenames # add config_filenames to config data + json_config["config_filenames"] = config_filenames[ + 0] # add config_filenames to config data (frontend developer said he only wanted one filename, + # and to request config_filenames by get_config_filenames_for_one_model later) all_model_configs[ model_name] = json_config # store original config file content in json format json_config[ @@ -340,6 +341,7 @@ def get_config_for_one_model(cur_dir, name, config_filename): json_config = json.loads(pbtxt2json(open(filename).read())) if 'name' not in json_config: json_config['name'] = name + json_config["config_filenames"] = config_filename all_model_configs[ name] = json_config # store original config file content in json format all_model_versions[name] = {} From 1067387887ef79821f019dd9ce704dd4f61b83ec Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 5 Jan 2023 15:54:19 +0800 Subject: [PATCH 38/48] fix name in config to model name --- visualdl/component/inference/fastdeploy_lib.py | 5 +++-- visualdl/component/inference/fastdeploy_server.py | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index b711c92cc..c8e7889d3 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -339,8 +339,9 @@ def get_config_for_one_model(cur_dir, name, config_filename): all_model_versions = {} filename = os.path.join(cur_dir, name, config_filename) json_config = json.loads(pbtxt2json(open(filename).read())) - if 'name' not in json_config: - json_config['name'] = name + json_config[ + 'name'] = name # because name in config data may be different from model_name, + # model_name is model directory name actually, we should conform name with model_name. json_config["config_filenames"] = config_filename all_model_configs[ name] = json_config # store original config file content in json format diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 5e0dc143b..fe852d8c8 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -248,9 +248,10 @@ def set_default_config_for_model(self, cur_dir, name, config_filename): os.path.join( model_dir, 'config_vdlbackup_{}.pbtxt'.format( datetime.datetime.now().isoformat()))) - shutil.copy( - os.path.join(model_dir, config_filename), - os.path.join(model_dir, 'config.pbtxt')) + if config_filename != 'config.pbtxt': + shutil.copy( + os.path.join(model_dir, config_filename), + os.path.join(model_dir, 'config.pbtxt')) return def create_fastdeploy_client(self): From c37fe8e31c55f079da24a65fcd29ebdebd02cdf5 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 6 Jan 2023 13:26:19 +0800 Subject: [PATCH 39/48] optimize for server list and alive judgement --- visualdl/component/inference/fastdeploy_lib.py | 11 +++++++++-- visualdl/component/inference/fastdeploy_server.py | 13 ++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index c8e7889d3..72196ee5b 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -326,11 +326,18 @@ def analyse_step_relationships(step_config, inputs, outputs): # noqa: C901 def get_config_filenames_for_one_model(cur_dir, name): _, _, filenames = os.walk(os.path.join(cur_dir, name)).send(None) config_filenames = [] + backup_config_filenames = [] for filename in filenames: - if '.pbtxt' in filename: + if '.pbtxt' in filename and 'vdlbackup' not in filename: config_filenames.append( filename - ) # filenames with extension .pbtxt are all config files + ) # filenames with extension .pbtxt and not contain 'vdlbackup' are normal config files + elif '.pbtxt' in filename and 'vdlbackup' in filename: + backup_config_filenames.append( + filename + ) # filenames with extension .pbtxt and contain 'vdlbackup' are backup config files + config_filenames = sorted(config_filenames) + sorted( + backup_config_filenames) return config_filenames diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index fe852d8c8..0249fdbf5 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -27,6 +27,7 @@ from .fastdeploy_client.client_app import create_gradio_client_app from .fastdeploy_lib import analyse_config +from .fastdeploy_lib import check_process_alive from .fastdeploy_lib import delete_files_for_process from .fastdeploy_lib import exchange_format_to_original_format from .fastdeploy_lib import generate_metric_table @@ -103,7 +104,8 @@ def start_server(self, configs): configs = json.loads(configs) process = launch_process(configs) if process.poll() is not None: - raise RuntimeError("启动fastdeployserver服务器失败,请检查启动参数") + raise RuntimeError( + "启动fastdeployserver服务器失败,请检查环境中是否存在fastdeployserver程序") server_name = configs['server-name'] if configs[ 'server-name'] else process.pid self.opened_servers[server_name] = process @@ -150,6 +152,14 @@ def get_server_metric(self, server_id): def get_server_list(self): return get_alive_fastdeploy_servers() + @result() + def check_server_alive(self, server_id): + if check_process_alive(server_id) is False: + delete_files_for_process(server_id) + raise RuntimeError( + "服务{}由于发生异常而退出,通常是由于启动参数设置不当或者环境配置有问题,请检查服务日志查看原因,然后手动关闭该服务项") + return + @result() def get_server_config(self, server_id): return get_process_model_configuration(server_id) @@ -305,6 +315,7 @@ def create_fastdeploy_api_call(): 'get_server_metric': (api.get_server_metric, ['server_id']), 'get_server_config': (api.get_server_config, ['server_id']), 'get_pretrain_model_list': (api.get_pretrain_model_list, []), + 'check_server_alive': (api.check_server_alive, ['server_id']), 'download_pretrain_model': (api.download_pretrain_model, ['dir', 'name', 'version', 'pretrain_model_name']), From 1acff82eabf9e8dd124539d57643ceaff91ce18a Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 6 Jan 2023 17:03:17 +0800 Subject: [PATCH 40/48] keep server name as string type --- visualdl/component/inference/fastdeploy_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 0249fdbf5..6498b54f8 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -107,7 +107,7 @@ def start_server(self, configs): raise RuntimeError( "启动fastdeployserver服务器失败,请检查环境中是否存在fastdeployserver程序") server_name = configs['server-name'] if configs[ - 'server-name'] else process.pid + 'server-name'] else str(process.pid) self.opened_servers[server_name] = process return server_name From 29c72e8ad56ad1de02a7cebc8198f29abdc0e76f Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 6 Jan 2023 19:57:04 +0800 Subject: [PATCH 41/48] optimize process judgement logic --- requirements.txt | 1 + .../fastdeploy_client/http_client_manager.py | 2 +- .../component/inference/fastdeploy_lib.py | 58 ++++++++++++++++++- .../component/inference/fastdeploy_server.py | 29 ++++++---- 4 files changed, 75 insertions(+), 15 deletions(-) diff --git a/requirements.txt b/requirements.txt index b3a3bd220..943f7c4eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ gradio fastdeploy-python tritonclient[all] attrdict +psutil diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index 47872e9b1..53aaca0e9 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -239,7 +239,7 @@ def _create_client(self, server_url): except Exception: raise RuntimeError( 'Can not connect to server {}, please check your \ - server address'.format(server_url)) + server address'.format(server_url)) def infer(self, server_url, model_name, model_version, inputs): fastdeploy_client = self._create_client(server_url) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 72196ee5b..8527c798f 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -26,6 +26,7 @@ import google.protobuf.json_format as json_format import google.protobuf.text_format as text_format +import psutil import requests from .proto.model_config.protxt_pb2 import ModelConfig @@ -437,7 +438,10 @@ def launch_process(kwargs: dict): cmd.append('--{}'.format(key)) cmd.append('{}'.format(value)) start_args[key] = value - + if start_args['server-name'] and start_args['server-name'] in os.listdir( + FASTDEPLOYSERVER_PATH): + raise RuntimeError("启动服务失败,服务名称{}已经被使用,请重新填写服务名称".format( + start_args['server-name'])) all_model_configs, all_model_versions = analyse_config( start_args['model-repository']) model_repo_config = original_format_to_exchange_format( @@ -568,6 +572,29 @@ def get_process_output(server_id, length): return data +def mark_pid_for_dead_process(server_id): + ''' + Resource files for a dead server only deleted when user closes the server in frontend. + When user close the server, pid recorded in logfile will be killed. + In case a dead process id is reassigned for a new process, we should mark the pid recorded in logfile as outdated. + Here, we choose to replace the pid to -1 in logfile to denote the zombie process \ + which has been polled and becomes dead. + Args: + server_id(str): fastdeployserver process name + ''' + if os.path.exists( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))): + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), + 'r') as f: + contents = f.read().split('\n') + contents[1] = '-1' # we replace pid to -1 + with open( + os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)), + 'w') as f: + f.write('\n'.join(contents)) + + def delete_files_for_process(server_id): ''' Delete logfile for fastdeployserver process. @@ -591,9 +618,10 @@ def kill_process(process): ''' if type(process) == str: # server_id, use os.kill to terminate pid = get_process_pid(process) + if pid == -1: # we use -1 to mark dead process + return try: os.kill(pid, signal.SIGKILL) - # delete file ${pid} if exists except Exception: pass else: @@ -624,6 +652,21 @@ def get_alive_fastdeploy_servers(): return server_names +def check_process_zombie(server_id): + ''' + Given a server id, check whether the process became zoombie and mark pid as -1. + Args: + server_id(str): fastdeployserver process name + Return: + status(bool): True if process became zoombie. + ''' + pid = get_process_pid(server_id) + if pid == -1: + return True + else: + return False + + def check_process_alive(server_id): ''' Given a server id, check whether the process is alive or not. @@ -635,12 +678,21 @@ def check_process_alive(server_id): pid = get_process_pid(server_id) if pid is None: return False + if pid == -1: # We use -1 to mark zombie process which has been dead process. + # Consider user wants to know the reason for dead process due to exception, + # we return True to let user in frontend can get the log for dead process. + return True try: os.kill(pid, 0) except OSError: return False else: - return True + if 'fastdeployserve' not in psutil.Process(pid).name( + ): # We should judge the pid is fastdeployserver process, in case pid has been reassigned. + # Note: I do not know why psutil.Process(pid).name() is fastdeployserve but not fastdeployserver. + return False + else: + return True _metric_column_name = { diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 6498b54f8..b5d8af42c 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -27,7 +27,7 @@ from .fastdeploy_client.client_app import create_gradio_client_app from .fastdeploy_lib import analyse_config -from .fastdeploy_lib import check_process_alive +from .fastdeploy_lib import check_process_zombie from .fastdeploy_lib import delete_files_for_process from .fastdeploy_lib import exchange_format_to_original_format from .fastdeploy_lib import generate_metric_table @@ -40,6 +40,7 @@ from .fastdeploy_lib import json2pbtxt from .fastdeploy_lib import kill_process from .fastdeploy_lib import launch_process +from .fastdeploy_lib import mark_pid_for_dead_process from .fastdeploy_lib import original_format_to_exchange_format from .fastdeploy_lib import validate_data from visualdl.server.api import gen_result @@ -121,13 +122,7 @@ def stop_server(self, server_id): # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn) kill_process(server_id) delete_files_for_process(server_id) - # check if there are servers killed by other vdl app instance and become zoombie - should_delete = [] - for server_id, process in self.opened_servers.items(): - if process.poll() is not None: - should_delete.append(server_id) - for server_id in should_delete: - del self.opened_servers[server_id] + self._poll_zombie_process() @result('text/plain') def get_server_output(self, server_id, length): @@ -154,10 +149,11 @@ def get_server_list(self): @result() def check_server_alive(self, server_id): - if check_process_alive(server_id) is False: - delete_files_for_process(server_id) + self._poll_zombie_process() + if check_process_zombie(server_id) is True: raise RuntimeError( - "服务{}由于发生异常而退出,通常是由于启动参数设置不当或者环境配置有问题,请检查服务日志查看原因,然后手动关闭该服务项") + "服务{}由于发生异常或者被kill而退出,通常是由于启动参数设置不当或者环境配置有问题,请检查服务日志查看原因,然后手动关闭该服务项" + .format(server_id)) return @result() @@ -293,6 +289,17 @@ def check_alive(): check_alive() return self.client_port + def _poll_zombie_process(self): + # check if there are servers killed by other vdl app instance and become zoombie + should_delete = [] + for server_id, process in self.opened_servers.items(): + if process.poll() is not None: + mark_pid_for_dead_process(server_id) + should_delete.append(server_id) + + for server_id in should_delete: + del self.opened_servers[server_id] + def create_fastdeploy_api_call(): api = FastDeployServerApi() From 92005ce1a3445037bcec103ffd35964896df7702 Mon Sep 17 00:00:00 2001 From: chenjian Date: Sun, 8 Jan 2023 18:44:59 +0800 Subject: [PATCH 42/48] optimize for deleting resource files --- .../inference/fastdeploy_client/client_app.py | 1 - .../fastdeploy_client/http_client_manager.py | 12 +++-- .../component/inference/fastdeploy_lib.py | 6 +++ .../component/inference/fastdeploy_server.py | 51 +++++++++++++++++++ 4 files changed, 66 insertions(+), 4 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py index 7b28bdbe3..397b8255a 100644 --- a/visualdl/component/inference/fastdeploy_client/client_app.py +++ b/visualdl/component/inference/fastdeploy_client/client_app.py @@ -313,7 +313,6 @@ def component_inference(*args): try: infer_results = _http_manager.infer( server_addr, model_name, model_version, inputs) - print('infer_results', infer_results) results = {status_text: 'Inference Successful'} for i, (output_name, data) in enumerate(infer_results.items()): diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index 53aaca0e9..572a837a7 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -252,9 +252,15 @@ def infer(self, server_url, model_name, model_version, inputs): results = {} for output in output_metadata: result = response.as_numpy(output.name) # datatype: numpy - if output.datatype == 'BYTES': - try: # maybe not vison tasks, normal text - value = result[0][0] # datatype: bytes + if output.datatype == 'BYTES': # datatype: bytes + try: + value = result + if len(result.shape) == 1: + value = result[0] + elif len(result.shape) == 2: + value = result[0][0] + elif len(result.shape) == 3: + value = result[0][0][0] result = json.loads(value) # datatype: json except Exception: pass diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 8527c798f..e99cfa16e 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -126,6 +126,12 @@ def analyse_config(cur_dir: str): os.path.join(model_dir, model_sub_dir)): all_model_versions[model_name][model_sub_dir].append( version_resource_file) + if model_name not in all_model_versions: # if a model has config but no version directory, + # to convenient users, we create one + all_model_versions[model_name] = {} + os.mkdir(os.path.join(model_dir, '1')) + all_model_versions[model_name]['1'] = [] + if not all_model_configs: raise Exception('所选择的路径不是一个有效的模型库,请选择正确的路径') return all_model_configs, all_model_versions diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index b5d8af42c..c2bd25b34 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -213,6 +213,11 @@ def download_pretrain_model(self, cur_dir, model_name, version, os.path.join( os.path.dirname(model_path), 'model{}'.format( os.path.splitext(filename)[1]))) + else: + shutil.move( + os.path.join(model_path, filename), + os.path.join( + os.path.dirname(model_path), filename)) shutil.rmtree(model_path) version_info_for_frontend = [] for version_name in os.listdir(os.path.join(cur_dir, model_name)): @@ -244,6 +249,16 @@ def get_config_for_model(self, cur_dir, name, config_filename): def get_config_filenames_for_model(self, cur_dir, name): return get_config_filenames_for_one_model(cur_dir, name) + @result() + def delete_config_for_model(self, cur_dir, name, config_filename): + if self.root_dir not in Path( + os.path.abspath(cur_dir) + ).parents: # should prevent user remove files outside model-repository + raise RuntimeError('所删除的文件路径有误') + if os.path.exists(os.path.join(cur_dir, name, config_filename)): + os.remove(os.path.join(cur_dir, name, config_filename)) + return get_config_filenames_for_one_model(cur_dir, name) + @result() def set_default_config_for_model(self, cur_dir, name, config_filename): model_dir = os.path.join(os.path.abspath(cur_dir), name) @@ -260,6 +275,37 @@ def set_default_config_for_model(self, cur_dir, name, config_filename): os.path.join(model_dir, 'config.pbtxt')) return + @result() + def delete_resource_for_model(self, cur_dir, model_name, version, + resource_filename): + if self.root_dir not in Path( + os.path.abspath(cur_dir) + ).parents: # should prevent user remove files outside model-repository + raise RuntimeError('所删除的文件路径有误') + resource_path = os.path.join( + os.path.abspath(cur_dir), model_name, version, resource_filename) + if os.path.exists(resource_path): + os.remove(resource_path) + version_info_for_frontend = [] + for version_name in os.listdir(os.path.join(cur_dir, model_name)): + if re.match(r'\d+', + version_name): # version directory consists of numbers + version_filenames_dict_for_frontend = {} + version_filenames_dict_for_frontend['title'] = version_name + version_filenames_dict_for_frontend['key'] = version_name + version_filenames_dict_for_frontend['children'] = [] + for filename in os.listdir( + os.path.join(cur_dir, model_name, version_name)): + version_filenames_dict_for_frontend['children'].append({ + 'title': + filename, + 'key': + filename + }) + version_info_for_frontend.append( + version_filenames_dict_for_frontend) + return version_info_for_frontend + def create_fastdeploy_client(self): if self.client_port is None: @@ -314,6 +360,8 @@ def create_fastdeploy_api_call(): ['dir', 'name', 'config_filename']), 'set_default_config_for_model': (api.set_default_config_for_model, ['dir', 'name', 'config_filename']), + 'delete_config_for_model': (api.delete_config_for_model, + ['dir', 'name', 'config_filename']), 'start_server': (api.start_server, ['config']), 'stop_server': (api.stop_server, ['server_id']), 'get_server_output': (api.get_server_output, ['server_id', 'length']), @@ -326,6 +374,9 @@ def create_fastdeploy_api_call(): 'download_pretrain_model': (api.download_pretrain_model, ['dir', 'name', 'version', 'pretrain_model_name']), + 'delete_resource_for_model': + (api.delete_resource_for_model, + ['dir', 'name', 'version', 'resource_filename']) } def call(path: str, args): From aeb2c9ba351e547fc132666577cbc2671eecbf5d Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 9 Jan 2023 11:51:20 +0800 Subject: [PATCH 43/48] add rename resource file --- visualdl/component/inference/fastdeploy_lib.py | 16 ++++++++++++---- .../component/inference/fastdeploy_server.py | 18 +++++++++++------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index e99cfa16e..a6b31ac21 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -17,7 +17,6 @@ import os import random import re -import shutil import signal import string from collections import defaultdict @@ -95,9 +94,8 @@ def analyse_config(cur_dir: str): config_filenames.insert(0, default_config_filename) else: # if no config.pbtxt, we choose the first file in config_filenames list to create config.pbtxt - shutil.copy( - os.path.join(model_dir, default_config_filename), - os.path.join(model_dir, 'config.pbtxt')) + copy_config_file_to_default_config(model_dir, + default_config_filename) default_config_filename = 'config.pbtxt' config_filenames.insert(0, default_config_filename) json_config = json.loads( @@ -191,6 +189,16 @@ def exchange_format_to_original_format(exchange_format): return all_models +def copy_config_file_to_default_config(model_dir, config_name): + json_config = json.loads( + pbtxt2json(open(os.path.join(model_dir, config_name)).read())) + model_name = os.path.basename(model_dir) + json_config['name'] = model_name + text_proto = json2pbtxt(json.dumps(json_config)) + with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f: + f.write(text_proto) + + def original_format_to_exchange_format(original_format, version_info): ''' Change config original format to exchange format. diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index c2bd25b34..58eba20c9 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -28,6 +28,7 @@ from .fastdeploy_client.client_app import create_gradio_client_app from .fastdeploy_lib import analyse_config from .fastdeploy_lib import check_process_zombie +from .fastdeploy_lib import copy_config_file_to_default_config from .fastdeploy_lib import delete_files_for_process from .fastdeploy_lib import exchange_format_to_original_format from .fastdeploy_lib import generate_metric_table @@ -270,22 +271,22 @@ def set_default_config_for_model(self, cur_dir, name, config_filename): model_dir, 'config_vdlbackup_{}.pbtxt'.format( datetime.datetime.now().isoformat()))) if config_filename != 'config.pbtxt': - shutil.copy( - os.path.join(model_dir, config_filename), - os.path.join(model_dir, 'config.pbtxt')) + copy_config_file_to_default_config(model_dir, config_filename) return @result() def delete_resource_for_model(self, cur_dir, model_name, version, - resource_filename): + resource_filename, new_filename): if self.root_dir not in Path( os.path.abspath(cur_dir) ).parents: # should prevent user remove files outside model-repository - raise RuntimeError('所删除的文件路径有误') + raise RuntimeError('所重命名的文件路径有误') resource_path = os.path.join( os.path.abspath(cur_dir), model_name, version, resource_filename) + new_file_path = os.path.join( + os.path.abspath(cur_dir), model_name, version, new_filename) if os.path.exists(resource_path): - os.remove(resource_path) + shutil.move(resource_path, new_file_path) version_info_for_frontend = [] for version_name in os.listdir(os.path.join(cur_dir, model_name)): if re.match(r'\d+', @@ -376,7 +377,10 @@ def create_fastdeploy_api_call(): ['dir', 'name', 'version', 'pretrain_model_name']), 'delete_resource_for_model': (api.delete_resource_for_model, - ['dir', 'name', 'version', 'resource_filename']) + ['dir', 'name', 'version', 'resource_filename']), + 'rename_resource_for_model': (api.rename_resource_for_model, [ + 'dir', 'name', 'version', 'resource_filename', 'new_filename' + ]) } def call(path: str, args): From 23a6c69de81f2881725fca3a1df0198924ce428b Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 9 Jan 2023 12:00:46 +0800 Subject: [PATCH 44/48] fix --- .../component/inference/fastdeploy_server.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 58eba20c9..6397c33e5 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -276,6 +276,37 @@ def set_default_config_for_model(self, cur_dir, name, config_filename): @result() def delete_resource_for_model(self, cur_dir, model_name, version, + resource_filename): + if self.root_dir not in Path( + os.path.abspath(cur_dir) + ).parents: # should prevent user remove files outside model-repository + raise RuntimeError('所删除的文件路径有误') + resource_path = os.path.join( + os.path.abspath(cur_dir), model_name, version, resource_filename) + if os.path.exists(resource_path): + os.remove(resource_path) + version_info_for_frontend = [] + for version_name in os.listdir(os.path.join(cur_dir, model_name)): + if re.match(r'\d+', + version_name): # version directory consists of numbers + version_filenames_dict_for_frontend = {} + version_filenames_dict_for_frontend['title'] = version_name + version_filenames_dict_for_frontend['key'] = version_name + version_filenames_dict_for_frontend['children'] = [] + for filename in os.listdir( + os.path.join(cur_dir, model_name, version_name)): + version_filenames_dict_for_frontend['children'].append({ + 'title': + filename, + 'key': + filename + }) + version_info_for_frontend.append( + version_filenames_dict_for_frontend) + return version_info_for_frontend + + @result() + def rename_resource_for_model(self, cur_dir, model_name, version, resource_filename, new_filename): if self.root_dir not in Path( os.path.abspath(cur_dir) From 00566df408a86e9226554ba98625d8423a44fed6 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 9 Jan 2023 14:27:58 +0800 Subject: [PATCH 45/48] fix a bug --- .../fastdeploy_client/http_client_manager.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py index 572a837a7..691594152 100644 --- a/visualdl/component/inference/fastdeploy_client/http_client_manager.py +++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py @@ -15,6 +15,7 @@ import json import re +import numpy as np import requests import tritonclient.http as httpclient from attrdict import AttrDict @@ -40,6 +41,18 @@ def prepare_request(inputs_meta, inputs_data, outputs_meta): raise RuntimeError( 'Error: input name {} required for model not existed.'.format( input_name)) + if input_dict['datatype'] == 'FP32': + inputs_data[input_name] = inputs_data[input_name].astype( + np.float32 + ) / 255 # image data returned by gradio is uint8, convert to fp32 + if len(input_dict['shape'] + ) == 3 and input_dict['shape'][0] == 3: # NCHW + inputs_data[input_name] = inputs_data[input_name][0].transpose( + 2, 0, 1) + elif len(input_dict['shape'] + ) == 4 and input_dict['shape'][1] == 3: # NCHW + inputs_data[input_name] = inputs_data[input_name].transpose( + 0, 3, 1, 2) infer_input = httpclient.InferInput( input_name, inputs_data[input_name].shape, input_dict['datatype']) infer_input.set_data_from_numpy(inputs_data[input_name]) @@ -249,6 +262,7 @@ def infer(self, server_url, model_name, model_version, inputs): output_metadata) response = fastdeploy_client.infer( model_name, inputs, model_version=model_version, outputs=outputs) + results = {} for output in output_metadata: result = response.as_numpy(output.name) # datatype: numpy From 86c73cd3ff44bfee117fdba255f0a0a69cc16ff4 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 10 Jan 2023 15:47:24 +0800 Subject: [PATCH 46/48] optimize code structure --- .../inference/fastdeploy_client/__init__.py | 14 + .../component/inference/fastdeploy_lib.py | 2 +- .../inference/proto/model_config/__init__.py | 0 .../proto/model_config/protxt_pb2.py | 856 ------------------ 4 files changed, 15 insertions(+), 857 deletions(-) delete mode 100644 visualdl/component/inference/proto/model_config/__init__.py delete mode 100644 visualdl/component/inference/proto/model_config/protxt_pb2.py diff --git a/visualdl/component/inference/fastdeploy_client/__init__.py b/visualdl/component/inference/fastdeploy_client/__init__.py index e69de29bb..9c19f7b87 100644 --- a/visualdl/component/inference/fastdeploy_client/__init__.py +++ b/visualdl/component/inference/fastdeploy_client/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index a6b31ac21..3c0138f93 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -28,7 +28,7 @@ import psutil import requests -from .proto.model_config.protxt_pb2 import ModelConfig +from .proto.model_config_pb2 import ModelConfig from visualdl.utils.dir import FASTDEPLOYSERVER_PATH diff --git a/visualdl/component/inference/proto/model_config/__init__.py b/visualdl/component/inference/proto/model_config/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/visualdl/component/inference/proto/model_config/protxt_pb2.py b/visualdl/component/inference/proto/model_config/protxt_pb2.py deleted file mode 100644 index 70bf7b906..000000000 --- a/visualdl/component/inference/proto/model_config/protxt_pb2.py +++ /dev/null @@ -1,856 +0,0 @@ -# flake8: noqa -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: model_config.protxt -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import enum_type_wrapper -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3' -) - -_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType'] -DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE) -TYPE_INVALID = 0 -TYPE_BOOL = 1 -TYPE_UINT8 = 2 -TYPE_UINT16 = 3 -TYPE_UINT32 = 4 -TYPE_UINT64 = 5 -TYPE_INT8 = 6 -TYPE_INT16 = 7 -TYPE_INT32 = 8 -TYPE_INT64 = 9 -TYPE_FP16 = 10 -TYPE_FP32 = 11 -TYPE_FP64 = 12 -TYPE_STRING = 13 -TYPE_BF16 = 14 - -_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter'] -_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource'] -_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup'] -_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[ - 'SecondaryDevice'] -_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape'] -_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput'] -_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput'] -_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput'] -_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput'] -_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy'] -_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest'] -_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All'] -_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[ - 'Specific'] -_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[ - 'ModelOptimizationPolicy'] -_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ - 'Graph'] -_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ - 'Cuda'] -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[ - 'GraphSpec'] -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ - 'Shape'] -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ - 'LowerBound'] -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[ - 'InputEntry'] -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ - 'InputEntry'] -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ - 'ExecutionAccelerators'] -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[ - 'Accelerator'] -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[ - 'ParametersEntry'] -_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ - 'PinnedMemoryBuffer'] -_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy'] -_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[ - 'ModelDynamicBatching'] -_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[ - 'PriorityQueuePolicyEntry'] -_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[ - 'ModelSequenceBatching'] -_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[ - 'Control'] -_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[ - 'ControlInput'] -_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[ - 'InitialState'] -_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[ - 'State'] -_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[ - 'StrategyDirect'] -_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[ - 'StrategyOldest'] -_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling'] -_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step'] -_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[ - 'InputMapEntry'] -_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[ - 'OutputMapEntry'] -_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter'] -_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup'] -_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input'] -_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry'] -_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations'] -_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[ - 'ModelTransactionPolicy'] -_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[ - 'ModelRepositoryAgents'] -_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[ - 'Agent'] -_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[ - 'ParametersEntry'] -_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache'] -_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig'] -_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[ - 'CcModelFilenamesEntry'] -_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[ - 'MetricTagsEntry'] -_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[ - 'ParametersEntry'] -_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[ - 'SecondaryDeviceKind'] -_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind'] -_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format'] -_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind'] -_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind'] -_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[ - 'ModelPriority'] -_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[ - 'TimeoutAction'] -_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[ - 'Kind'] -ModelRateLimiter = _reflection.GeneratedProtocolMessageType( - 'ModelRateLimiter', - (_message.Message, ), - { - 'Resource': - _reflection.GeneratedProtocolMessageType( - 'Resource', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELRATELIMITER_RESOURCE, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource) - }), - 'DESCRIPTOR': - _MODELRATELIMITER, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter) - }) -_sym_db.RegisterMessage(ModelRateLimiter) -_sym_db.RegisterMessage(ModelRateLimiter.Resource) - -ModelInstanceGroup = _reflection.GeneratedProtocolMessageType( - 'ModelInstanceGroup', - (_message.Message, ), - { - 'SecondaryDevice': - _reflection.GeneratedProtocolMessageType( - 'SecondaryDevice', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice) - }), - 'DESCRIPTOR': - _MODELINSTANCEGROUP, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup) - }) -_sym_db.RegisterMessage(ModelInstanceGroup) -_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice) - -ModelTensorReshape = _reflection.GeneratedProtocolMessageType( - 'ModelTensorReshape', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELTENSORRESHAPE, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape) - }) -_sym_db.RegisterMessage(ModelTensorReshape) - -ModelInput = _reflection.GeneratedProtocolMessageType( - 'ModelInput', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELINPUT, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelInput) - }) -_sym_db.RegisterMessage(ModelInput) - -ModelOutput = _reflection.GeneratedProtocolMessageType( - 'ModelOutput', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELOUTPUT, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOutput) - }) -_sym_db.RegisterMessage(ModelOutput) - -BatchInput = _reflection.GeneratedProtocolMessageType( - 'BatchInput', - (_message.Message, ), - { - 'DESCRIPTOR': _BATCHINPUT, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.BatchInput) - }) -_sym_db.RegisterMessage(BatchInput) - -BatchOutput = _reflection.GeneratedProtocolMessageType( - 'BatchOutput', - (_message.Message, ), - { - 'DESCRIPTOR': _BATCHOUTPUT, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.BatchOutput) - }) -_sym_db.RegisterMessage(BatchOutput) - -ModelVersionPolicy = _reflection.GeneratedProtocolMessageType( - 'ModelVersionPolicy', - (_message.Message, ), - { - 'Latest': - _reflection.GeneratedProtocolMessageType( - 'Latest', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest) - }), - 'All': - _reflection.GeneratedProtocolMessageType( - 'All', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELVERSIONPOLICY_ALL, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All) - }), - 'Specific': - _reflection.GeneratedProtocolMessageType( - 'Specific', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific) - }), - 'DESCRIPTOR': - _MODELVERSIONPOLICY, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy) - }) -_sym_db.RegisterMessage(ModelVersionPolicy) -_sym_db.RegisterMessage(ModelVersionPolicy.Latest) -_sym_db.RegisterMessage(ModelVersionPolicy.All) -_sym_db.RegisterMessage(ModelVersionPolicy.Specific) - -ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType( - 'ModelOptimizationPolicy', - (_message.Message, ), - { - 'Graph': - _reflection.GeneratedProtocolMessageType( - 'Graph', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph) - }), - 'Cuda': - _reflection.GeneratedProtocolMessageType( - 'Cuda', - (_message.Message, ), - { - 'GraphSpec': - _reflection.GeneratedProtocolMessageType( - 'GraphSpec', - (_message.Message, ), - { - 'Shape': - _reflection.GeneratedProtocolMessageType( - 'Shape', - (_message.Message, ), - { - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape) - }), - 'LowerBound': - _reflection.GeneratedProtocolMessageType( - 'LowerBound', - (_message.Message, ), - { - 'InputEntry': - _reflection.GeneratedProtocolMessageType( - 'InputEntry', - (_message.Message, ), - { - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) - }), - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) - }), - 'InputEntry': - _reflection.GeneratedProtocolMessageType( - 'InputEntry', - (_message.Message, ), - { - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) - }), - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec) - }), - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_CUDA, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda) - }), - 'ExecutionAccelerators': - _reflection.GeneratedProtocolMessageType( - 'ExecutionAccelerators', - (_message.Message, ), - { - 'Accelerator': - _reflection.GeneratedProtocolMessageType( - 'Accelerator', - (_message.Message, ), - { - 'ParametersEntry': - _reflection.GeneratedProtocolMessageType( - 'ParametersEntry', - (_message.Message, ), - { - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) - }), - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) - }), - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators) - }), - 'PinnedMemoryBuffer': - _reflection.GeneratedProtocolMessageType( - 'PinnedMemoryBuffer', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer) - }), - 'DESCRIPTOR': - _MODELOPTIMIZATIONPOLICY, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy) - }) -_sym_db.RegisterMessage(ModelOptimizationPolicy) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) -_sym_db.RegisterMessage( - ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) -_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators) -_sym_db.RegisterMessage( - ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) -_sym_db.RegisterMessage( - ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) -_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer) - -ModelQueuePolicy = _reflection.GeneratedProtocolMessageType( - 'ModelQueuePolicy', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELQUEUEPOLICY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy) - }) -_sym_db.RegisterMessage(ModelQueuePolicy) - -ModelDynamicBatching = _reflection.GeneratedProtocolMessageType( - 'ModelDynamicBatching', - (_message.Message, ), - { - 'PriorityQueuePolicyEntry': - _reflection.GeneratedProtocolMessageType( - 'PriorityQueuePolicyEntry', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry) - }), - 'DESCRIPTOR': - _MODELDYNAMICBATCHING, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching) - }) -_sym_db.RegisterMessage(ModelDynamicBatching) -_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry) - -ModelSequenceBatching = _reflection.GeneratedProtocolMessageType( - 'ModelSequenceBatching', - (_message.Message, ), - { - 'Control': - _reflection.GeneratedProtocolMessageType( - 'Control', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control) - }), - 'ControlInput': - _reflection.GeneratedProtocolMessageType( - 'ControlInput', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput) - }), - 'InitialState': - _reflection.GeneratedProtocolMessageType( - 'InitialState', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState) - }), - 'State': - _reflection.GeneratedProtocolMessageType( - 'State', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State) - }), - 'StrategyDirect': - _reflection.GeneratedProtocolMessageType( - 'StrategyDirect', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect) - }), - 'StrategyOldest': - _reflection.GeneratedProtocolMessageType( - 'StrategyOldest', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest) - }), - 'DESCRIPTOR': - _MODELSEQUENCEBATCHING, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching) - }) -_sym_db.RegisterMessage(ModelSequenceBatching) -_sym_db.RegisterMessage(ModelSequenceBatching.Control) -_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput) -_sym_db.RegisterMessage(ModelSequenceBatching.InitialState) -_sym_db.RegisterMessage(ModelSequenceBatching.State) -_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect) -_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest) - -ModelEnsembling = _reflection.GeneratedProtocolMessageType( - 'ModelEnsembling', - (_message.Message, ), - { - 'Step': - _reflection.GeneratedProtocolMessageType( - 'Step', - (_message.Message, ), - { - 'InputMapEntry': - _reflection.GeneratedProtocolMessageType( - 'InputMapEntry', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry) - }), - 'OutputMapEntry': - _reflection.GeneratedProtocolMessageType( - 'OutputMapEntry', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry) - }), - 'DESCRIPTOR': - _MODELENSEMBLING_STEP, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step) - }), - 'DESCRIPTOR': - _MODELENSEMBLING, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling) - }) -_sym_db.RegisterMessage(ModelEnsembling) -_sym_db.RegisterMessage(ModelEnsembling.Step) -_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry) -_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry) - -ModelParameter = _reflection.GeneratedProtocolMessageType( - 'ModelParameter', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELPARAMETER, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelParameter) - }) -_sym_db.RegisterMessage(ModelParameter) - -ModelWarmup = _reflection.GeneratedProtocolMessageType( - 'ModelWarmup', - (_message.Message, ), - { - 'Input': - _reflection.GeneratedProtocolMessageType( - 'Input', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELWARMUP_INPUT, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input) - }), - 'InputsEntry': - _reflection.GeneratedProtocolMessageType( - 'InputsEntry', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry) - }), - 'DESCRIPTOR': - _MODELWARMUP, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelWarmup) - }) -_sym_db.RegisterMessage(ModelWarmup) -_sym_db.RegisterMessage(ModelWarmup.Input) -_sym_db.RegisterMessage(ModelWarmup.InputsEntry) - -ModelOperations = _reflection.GeneratedProtocolMessageType( - 'ModelOperations', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELOPERATIONS, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelOperations) - }) -_sym_db.RegisterMessage(ModelOperations) - -ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType( - 'ModelTransactionPolicy', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELTRANSACTIONPOLICY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy) - }) -_sym_db.RegisterMessage(ModelTransactionPolicy) - -ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType( - 'ModelRepositoryAgents', - (_message.Message, ), - { - 'Agent': - _reflection.GeneratedProtocolMessageType( - 'Agent', - (_message.Message, ), - { - 'ParametersEntry': - _reflection.GeneratedProtocolMessageType( - 'ParametersEntry', - (_message.Message, ), - { - 'DESCRIPTOR': - _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry) - }), - 'DESCRIPTOR': - _MODELREPOSITORYAGENTS_AGENT, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent) - }), - 'DESCRIPTOR': - _MODELREPOSITORYAGENTS, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents) - }) -_sym_db.RegisterMessage(ModelRepositoryAgents) -_sym_db.RegisterMessage(ModelRepositoryAgents.Agent) -_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry) - -ModelResponseCache = _reflection.GeneratedProtocolMessageType( - 'ModelResponseCache', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELRESPONSECACHE, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelResponseCache) - }) -_sym_db.RegisterMessage(ModelResponseCache) - -ModelConfig = _reflection.GeneratedProtocolMessageType( - 'ModelConfig', - (_message.Message, ), - { - 'CcModelFilenamesEntry': - _reflection.GeneratedProtocolMessageType( - 'CcModelFilenamesEntry', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry) - }), - 'MetricTagsEntry': - _reflection.GeneratedProtocolMessageType( - 'MetricTagsEntry', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry) - }), - 'ParametersEntry': - _reflection.GeneratedProtocolMessageType( - 'ParametersEntry', - (_message.Message, ), - { - 'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY, - '__module__': 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry) - }), - 'DESCRIPTOR': - _MODELCONFIG, - '__module__': - 'model_config.protxt_pb2' - # @@protoc_insertion_point(class_scope:inference.ModelConfig) - }) -_sym_db.RegisterMessage(ModelConfig) -_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry) -_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry) -_sym_db.RegisterMessage(ModelConfig.ParametersEntry) - -if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001' - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001' - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001' - _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None - _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001' - _MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None - _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001' - _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None - _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001' - _MODELWARMUP_INPUTSENTRY._options = None - _MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001' - _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None - _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001' - _MODELCONFIG_CCMODELFILENAMESENTRY._options = None - _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001' - _MODELCONFIG_METRICTAGSENTRY._options = None - _MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001' - _MODELCONFIG_PARAMETERSENTRY._options = None - _MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001' - _DATATYPE._serialized_start = 8137 - _DATATYPE._serialized_end = 8387 - _MODELRATELIMITER._serialized_start = 35 - _MODELRATELIMITER._serialized_end = 185 - _MODELRATELIMITER_RESOURCE._serialized_start = 130 - _MODELRATELIMITER_RESOURCE._serialized_end = 185 - _MODELINSTANCEGROUP._serialized_start = 188 - _MODELINSTANCEGROUP._serialized_end = 707 - _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484 - _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640 - _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603 - _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640 - _MODELINSTANCEGROUP_KIND._serialized_start = 642 - _MODELINSTANCEGROUP_KIND._serialized_end = 707 - _MODELTENSORRESHAPE._serialized_start = 709 - _MODELTENSORRESHAPE._serialized_end = 744 - _MODELINPUT._serialized_start = 747 - _MODELINPUT._serialized_end = 1053 - _MODELINPUT_FORMAT._serialized_start = 994 - _MODELINPUT_FORMAT._serialized_end = 1053 - _MODELOUTPUT._serialized_start = 1056 - _MODELOUTPUT._serialized_end = 1234 - _BATCHINPUT._serialized_start = 1237 - _BATCHINPUT._serialized_end = 1582 - _BATCHINPUT_KIND._serialized_start = 1377 - _BATCHINPUT_KIND._serialized_end = 1582 - _BATCHOUTPUT._serialized_start = 1585 - _BATCHOUTPUT._serialized_end = 1728 - _BATCHOUTPUT_KIND._serialized_start = 1686 - _BATCHOUTPUT_KIND._serialized_end = 1728 - _MODELVERSIONPOLICY._serialized_start = 1731 - _MODELVERSIONPOLICY._serialized_end = 2003 - _MODELVERSIONPOLICY_LATEST._serialized_start = 1919 - _MODELVERSIONPOLICY_LATEST._serialized_end = 1949 - _MODELVERSIONPOLICY_ALL._serialized_start = 1951 - _MODELVERSIONPOLICY_ALL._serialized_end = 1956 - _MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958 - _MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986 - _MODELOPTIMIZATIONPOLICY._serialized_start = 2006 - _MODELOPTIMIZATIONPOLICY._serialized_end = 3795 - _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536 - _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558 - _MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561 - _MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055 - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156 - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262 - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682 - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498 - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682 - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633 - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682 - _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684 - _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720 - _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722 - _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795 - _MODELQUEUEPOLICY._serialized_start = 3798 - _MODELQUEUEPOLICY._serialized_end = 4017 - _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979 - _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017 - _MODELDYNAMICBATCHING._serialized_start = 4020 - _MODELDYNAMICBATCHING._serialized_end = 4431 - _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344 - _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431 - _MODELSEQUENCEBATCHING._serialized_start = 4434 - _MODELSEQUENCEBATCHING._serialized_end = 5697 - _MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759 - _MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064 - _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947 - _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064 - _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066 - _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153 - _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156 - _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294 - _MODELSEQUENCEBATCHING_STATE._serialized_start = 5297 - _MODELSEQUENCEBATCHING_STATE._serialized_end = 5469 - _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471 - _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559 - _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561 - _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678 - _MODELENSEMBLING._serialized_start = 5700 - _MODELENSEMBLING._serialized_end = 6049 - _MODELENSEMBLING_STEP._serialized_start = 5767 - _MODELENSEMBLING_STEP._serialized_end = 6049 - _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952 - _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999 - _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001 - _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049 - _MODELPARAMETER._serialized_start = 6051 - _MODELPARAMETER._serialized_end = 6089 - _MODELWARMUP._serialized_start = 6092 - _MODELWARMUP._serialized_end = 6437 - _MODELWARMUP_INPUT._serialized_start = 6209 - _MODELWARMUP_INPUT._serialized_end = 6360 - _MODELWARMUP_INPUTSENTRY._serialized_start = 6362 - _MODELWARMUP_INPUTSENTRY._serialized_end = 6437 - _MODELOPERATIONS._serialized_start = 6439 - _MODELOPERATIONS._serialized_end = 6485 - _MODELTRANSACTIONPOLICY._serialized_start = 6487 - _MODELTRANSACTIONPOLICY._serialized_end = 6530 - _MODELREPOSITORYAGENTS._serialized_start = 6533 - _MODELREPOSITORYAGENTS._serialized_end = 6763 - _MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615 - _MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763 - _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633 - _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682 - _MODELRESPONSECACHE._serialized_start = 6765 - _MODELRESPONSECACHE._serialized_end = 6801 - _MODELCONFIG._serialized_start = 6804 - _MODELCONFIG._serialized_end = 8134 - _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929 - _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984 - _MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986 - _MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035 - _MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037 - _MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113 -# @@protoc_insertion_point(module_scope) From 7c3c3b7ecc3ba20fcdf8ef22afeb1bbd2226ed49 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 10 Jan 2023 15:47:49 +0800 Subject: [PATCH 47/48] optimize code structure --- .../component/inference/proto/__init__.py | 14 + .../inference/proto/model_config_pb2.py | 856 ++++++++++++++++++ 2 files changed, 870 insertions(+) create mode 100644 visualdl/component/inference/proto/__init__.py create mode 100644 visualdl/component/inference/proto/model_config_pb2.py diff --git a/visualdl/component/inference/proto/__init__.py b/visualdl/component/inference/proto/__init__.py new file mode 100644 index 000000000..9c19f7b87 --- /dev/null +++ b/visualdl/component/inference/proto/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022 VisualDL Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ======================================================================= diff --git a/visualdl/component/inference/proto/model_config_pb2.py b/visualdl/component/inference/proto/model_config_pb2.py new file mode 100644 index 000000000..70bf7b906 --- /dev/null +++ b/visualdl/component/inference/proto/model_config_pb2.py @@ -0,0 +1,856 @@ +# flake8: noqa +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: model_config.protxt +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import enum_type_wrapper +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3' +) + +_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType'] +DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE) +TYPE_INVALID = 0 +TYPE_BOOL = 1 +TYPE_UINT8 = 2 +TYPE_UINT16 = 3 +TYPE_UINT32 = 4 +TYPE_UINT64 = 5 +TYPE_INT8 = 6 +TYPE_INT16 = 7 +TYPE_INT32 = 8 +TYPE_INT64 = 9 +TYPE_FP16 = 10 +TYPE_FP32 = 11 +TYPE_FP64 = 12 +TYPE_STRING = 13 +TYPE_BF16 = 14 + +_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter'] +_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource'] +_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup'] +_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[ + 'SecondaryDevice'] +_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape'] +_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput'] +_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput'] +_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput'] +_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput'] +_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy'] +_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest'] +_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All'] +_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[ + 'Specific'] +_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[ + 'ModelOptimizationPolicy'] +_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'Graph'] +_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'Cuda'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[ + 'GraphSpec'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ + 'Shape'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ + 'LowerBound'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[ + 'InputEntry'] +_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[ + 'InputEntry'] +_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'ExecutionAccelerators'] +_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[ + 'Accelerator'] +_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[ + 'ParametersEntry'] +_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[ + 'PinnedMemoryBuffer'] +_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy'] +_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[ + 'ModelDynamicBatching'] +_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[ + 'PriorityQueuePolicyEntry'] +_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[ + 'ModelSequenceBatching'] +_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'Control'] +_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'ControlInput'] +_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'InitialState'] +_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'State'] +_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'StrategyDirect'] +_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[ + 'StrategyOldest'] +_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling'] +_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step'] +_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[ + 'InputMapEntry'] +_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[ + 'OutputMapEntry'] +_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter'] +_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup'] +_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input'] +_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry'] +_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations'] +_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[ + 'ModelTransactionPolicy'] +_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[ + 'ModelRepositoryAgents'] +_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[ + 'Agent'] +_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[ + 'ParametersEntry'] +_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache'] +_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig'] +_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[ + 'CcModelFilenamesEntry'] +_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[ + 'MetricTagsEntry'] +_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[ + 'ParametersEntry'] +_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[ + 'SecondaryDeviceKind'] +_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind'] +_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format'] +_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind'] +_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind'] +_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[ + 'ModelPriority'] +_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[ + 'TimeoutAction'] +_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[ + 'Kind'] +ModelRateLimiter = _reflection.GeneratedProtocolMessageType( + 'ModelRateLimiter', + (_message.Message, ), + { + 'Resource': + _reflection.GeneratedProtocolMessageType( + 'Resource', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELRATELIMITER_RESOURCE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource) + }), + 'DESCRIPTOR': + _MODELRATELIMITER, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter) + }) +_sym_db.RegisterMessage(ModelRateLimiter) +_sym_db.RegisterMessage(ModelRateLimiter.Resource) + +ModelInstanceGroup = _reflection.GeneratedProtocolMessageType( + 'ModelInstanceGroup', + (_message.Message, ), + { + 'SecondaryDevice': + _reflection.GeneratedProtocolMessageType( + 'SecondaryDevice', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice) + }), + 'DESCRIPTOR': + _MODELINSTANCEGROUP, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup) + }) +_sym_db.RegisterMessage(ModelInstanceGroup) +_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice) + +ModelTensorReshape = _reflection.GeneratedProtocolMessageType( + 'ModelTensorReshape', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELTENSORRESHAPE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape) + }) +_sym_db.RegisterMessage(ModelTensorReshape) + +ModelInput = _reflection.GeneratedProtocolMessageType( + 'ModelInput', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELINPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelInput) + }) +_sym_db.RegisterMessage(ModelInput) + +ModelOutput = _reflection.GeneratedProtocolMessageType( + 'ModelOutput', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOUTPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOutput) + }) +_sym_db.RegisterMessage(ModelOutput) + +BatchInput = _reflection.GeneratedProtocolMessageType( + 'BatchInput', + (_message.Message, ), + { + 'DESCRIPTOR': _BATCHINPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.BatchInput) + }) +_sym_db.RegisterMessage(BatchInput) + +BatchOutput = _reflection.GeneratedProtocolMessageType( + 'BatchOutput', + (_message.Message, ), + { + 'DESCRIPTOR': _BATCHOUTPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.BatchOutput) + }) +_sym_db.RegisterMessage(BatchOutput) + +ModelVersionPolicy = _reflection.GeneratedProtocolMessageType( + 'ModelVersionPolicy', + (_message.Message, ), + { + 'Latest': + _reflection.GeneratedProtocolMessageType( + 'Latest', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest) + }), + 'All': + _reflection.GeneratedProtocolMessageType( + 'All', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELVERSIONPOLICY_ALL, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All) + }), + 'Specific': + _reflection.GeneratedProtocolMessageType( + 'Specific', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific) + }), + 'DESCRIPTOR': + _MODELVERSIONPOLICY, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy) + }) +_sym_db.RegisterMessage(ModelVersionPolicy) +_sym_db.RegisterMessage(ModelVersionPolicy.Latest) +_sym_db.RegisterMessage(ModelVersionPolicy.All) +_sym_db.RegisterMessage(ModelVersionPolicy.Specific) + +ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType( + 'ModelOptimizationPolicy', + (_message.Message, ), + { + 'Graph': + _reflection.GeneratedProtocolMessageType( + 'Graph', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph) + }), + 'Cuda': + _reflection.GeneratedProtocolMessageType( + 'Cuda', + (_message.Message, ), + { + 'GraphSpec': + _reflection.GeneratedProtocolMessageType( + 'GraphSpec', + (_message.Message, ), + { + 'Shape': + _reflection.GeneratedProtocolMessageType( + 'Shape', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape) + }), + 'LowerBound': + _reflection.GeneratedProtocolMessageType( + 'LowerBound', + (_message.Message, ), + { + 'InputEntry': + _reflection.GeneratedProtocolMessageType( + 'InputEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) + }), + 'InputEntry': + _reflection.GeneratedProtocolMessageType( + 'InputEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_CUDA, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda) + }), + 'ExecutionAccelerators': + _reflection.GeneratedProtocolMessageType( + 'ExecutionAccelerators', + (_message.Message, ), + { + 'Accelerator': + _reflection.GeneratedProtocolMessageType( + 'Accelerator', + (_message.Message, ), + { + 'ParametersEntry': + _reflection.GeneratedProtocolMessageType( + 'ParametersEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators) + }), + 'PinnedMemoryBuffer': + _reflection.GeneratedProtocolMessageType( + 'PinnedMemoryBuffer', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer) + }), + 'DESCRIPTOR': + _MODELOPTIMIZATIONPOLICY, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy) + }) +_sym_db.RegisterMessage(ModelOptimizationPolicy) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) +_sym_db.RegisterMessage( + ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) +_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) +_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators) +_sym_db.RegisterMessage( + ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) +_sym_db.RegisterMessage( + ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) +_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer) + +ModelQueuePolicy = _reflection.GeneratedProtocolMessageType( + 'ModelQueuePolicy', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELQUEUEPOLICY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy) + }) +_sym_db.RegisterMessage(ModelQueuePolicy) + +ModelDynamicBatching = _reflection.GeneratedProtocolMessageType( + 'ModelDynamicBatching', + (_message.Message, ), + { + 'PriorityQueuePolicyEntry': + _reflection.GeneratedProtocolMessageType( + 'PriorityQueuePolicyEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry) + }), + 'DESCRIPTOR': + _MODELDYNAMICBATCHING, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching) + }) +_sym_db.RegisterMessage(ModelDynamicBatching) +_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry) + +ModelSequenceBatching = _reflection.GeneratedProtocolMessageType( + 'ModelSequenceBatching', + (_message.Message, ), + { + 'Control': + _reflection.GeneratedProtocolMessageType( + 'Control', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control) + }), + 'ControlInput': + _reflection.GeneratedProtocolMessageType( + 'ControlInput', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput) + }), + 'InitialState': + _reflection.GeneratedProtocolMessageType( + 'InitialState', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState) + }), + 'State': + _reflection.GeneratedProtocolMessageType( + 'State', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State) + }), + 'StrategyDirect': + _reflection.GeneratedProtocolMessageType( + 'StrategyDirect', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect) + }), + 'StrategyOldest': + _reflection.GeneratedProtocolMessageType( + 'StrategyOldest', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest) + }), + 'DESCRIPTOR': + _MODELSEQUENCEBATCHING, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching) + }) +_sym_db.RegisterMessage(ModelSequenceBatching) +_sym_db.RegisterMessage(ModelSequenceBatching.Control) +_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput) +_sym_db.RegisterMessage(ModelSequenceBatching.InitialState) +_sym_db.RegisterMessage(ModelSequenceBatching.State) +_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect) +_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest) + +ModelEnsembling = _reflection.GeneratedProtocolMessageType( + 'ModelEnsembling', + (_message.Message, ), + { + 'Step': + _reflection.GeneratedProtocolMessageType( + 'Step', + (_message.Message, ), + { + 'InputMapEntry': + _reflection.GeneratedProtocolMessageType( + 'InputMapEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry) + }), + 'OutputMapEntry': + _reflection.GeneratedProtocolMessageType( + 'OutputMapEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry) + }), + 'DESCRIPTOR': + _MODELENSEMBLING_STEP, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step) + }), + 'DESCRIPTOR': + _MODELENSEMBLING, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelEnsembling) + }) +_sym_db.RegisterMessage(ModelEnsembling) +_sym_db.RegisterMessage(ModelEnsembling.Step) +_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry) +_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry) + +ModelParameter = _reflection.GeneratedProtocolMessageType( + 'ModelParameter', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELPARAMETER, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelParameter) + }) +_sym_db.RegisterMessage(ModelParameter) + +ModelWarmup = _reflection.GeneratedProtocolMessageType( + 'ModelWarmup', + (_message.Message, ), + { + 'Input': + _reflection.GeneratedProtocolMessageType( + 'Input', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELWARMUP_INPUT, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input) + }), + 'InputsEntry': + _reflection.GeneratedProtocolMessageType( + 'InputsEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry) + }), + 'DESCRIPTOR': + _MODELWARMUP, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelWarmup) + }) +_sym_db.RegisterMessage(ModelWarmup) +_sym_db.RegisterMessage(ModelWarmup.Input) +_sym_db.RegisterMessage(ModelWarmup.InputsEntry) + +ModelOperations = _reflection.GeneratedProtocolMessageType( + 'ModelOperations', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELOPERATIONS, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelOperations) + }) +_sym_db.RegisterMessage(ModelOperations) + +ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType( + 'ModelTransactionPolicy', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELTRANSACTIONPOLICY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy) + }) +_sym_db.RegisterMessage(ModelTransactionPolicy) + +ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType( + 'ModelRepositoryAgents', + (_message.Message, ), + { + 'Agent': + _reflection.GeneratedProtocolMessageType( + 'Agent', + (_message.Message, ), + { + 'ParametersEntry': + _reflection.GeneratedProtocolMessageType( + 'ParametersEntry', + (_message.Message, ), + { + 'DESCRIPTOR': + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry) + }), + 'DESCRIPTOR': + _MODELREPOSITORYAGENTS_AGENT, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent) + }), + 'DESCRIPTOR': + _MODELREPOSITORYAGENTS, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents) + }) +_sym_db.RegisterMessage(ModelRepositoryAgents) +_sym_db.RegisterMessage(ModelRepositoryAgents.Agent) +_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry) + +ModelResponseCache = _reflection.GeneratedProtocolMessageType( + 'ModelResponseCache', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELRESPONSECACHE, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelResponseCache) + }) +_sym_db.RegisterMessage(ModelResponseCache) + +ModelConfig = _reflection.GeneratedProtocolMessageType( + 'ModelConfig', + (_message.Message, ), + { + 'CcModelFilenamesEntry': + _reflection.GeneratedProtocolMessageType( + 'CcModelFilenamesEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry) + }), + 'MetricTagsEntry': + _reflection.GeneratedProtocolMessageType( + 'MetricTagsEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry) + }), + 'ParametersEntry': + _reflection.GeneratedProtocolMessageType( + 'ParametersEntry', + (_message.Message, ), + { + 'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY, + '__module__': 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry) + }), + 'DESCRIPTOR': + _MODELCONFIG, + '__module__': + 'model_config.protxt_pb2' + # @@protoc_insertion_point(class_scope:inference.ModelConfig) + }) +_sym_db.RegisterMessage(ModelConfig) +_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry) +_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry) +_sym_db.RegisterMessage(ModelConfig.ParametersEntry) + +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001' + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001' + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001' + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001' + _MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None + _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001' + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001' + _MODELWARMUP_INPUTSENTRY._options = None + _MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001' + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001' + _MODELCONFIG_CCMODELFILENAMESENTRY._options = None + _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001' + _MODELCONFIG_METRICTAGSENTRY._options = None + _MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001' + _MODELCONFIG_PARAMETERSENTRY._options = None + _MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001' + _DATATYPE._serialized_start = 8137 + _DATATYPE._serialized_end = 8387 + _MODELRATELIMITER._serialized_start = 35 + _MODELRATELIMITER._serialized_end = 185 + _MODELRATELIMITER_RESOURCE._serialized_start = 130 + _MODELRATELIMITER_RESOURCE._serialized_end = 185 + _MODELINSTANCEGROUP._serialized_start = 188 + _MODELINSTANCEGROUP._serialized_end = 707 + _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484 + _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640 + _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603 + _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640 + _MODELINSTANCEGROUP_KIND._serialized_start = 642 + _MODELINSTANCEGROUP_KIND._serialized_end = 707 + _MODELTENSORRESHAPE._serialized_start = 709 + _MODELTENSORRESHAPE._serialized_end = 744 + _MODELINPUT._serialized_start = 747 + _MODELINPUT._serialized_end = 1053 + _MODELINPUT_FORMAT._serialized_start = 994 + _MODELINPUT_FORMAT._serialized_end = 1053 + _MODELOUTPUT._serialized_start = 1056 + _MODELOUTPUT._serialized_end = 1234 + _BATCHINPUT._serialized_start = 1237 + _BATCHINPUT._serialized_end = 1582 + _BATCHINPUT_KIND._serialized_start = 1377 + _BATCHINPUT_KIND._serialized_end = 1582 + _BATCHOUTPUT._serialized_start = 1585 + _BATCHOUTPUT._serialized_end = 1728 + _BATCHOUTPUT_KIND._serialized_start = 1686 + _BATCHOUTPUT_KIND._serialized_end = 1728 + _MODELVERSIONPOLICY._serialized_start = 1731 + _MODELVERSIONPOLICY._serialized_end = 2003 + _MODELVERSIONPOLICY_LATEST._serialized_start = 1919 + _MODELVERSIONPOLICY_LATEST._serialized_end = 1949 + _MODELVERSIONPOLICY_ALL._serialized_start = 1951 + _MODELVERSIONPOLICY_ALL._serialized_end = 1956 + _MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958 + _MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986 + _MODELOPTIMIZATIONPOLICY._serialized_start = 2006 + _MODELOPTIMIZATIONPOLICY._serialized_end = 3795 + _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536 + _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558 + _MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561 + _MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055 + _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633 + _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682 + _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684 + _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720 + _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722 + _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795 + _MODELQUEUEPOLICY._serialized_start = 3798 + _MODELQUEUEPOLICY._serialized_end = 4017 + _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979 + _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017 + _MODELDYNAMICBATCHING._serialized_start = 4020 + _MODELDYNAMICBATCHING._serialized_end = 4431 + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344 + _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431 + _MODELSEQUENCEBATCHING._serialized_start = 4434 + _MODELSEQUENCEBATCHING._serialized_end = 5697 + _MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759 + _MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064 + _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947 + _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064 + _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066 + _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153 + _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156 + _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294 + _MODELSEQUENCEBATCHING_STATE._serialized_start = 5297 + _MODELSEQUENCEBATCHING_STATE._serialized_end = 5469 + _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471 + _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559 + _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561 + _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678 + _MODELENSEMBLING._serialized_start = 5700 + _MODELENSEMBLING._serialized_end = 6049 + _MODELENSEMBLING_STEP._serialized_start = 5767 + _MODELENSEMBLING_STEP._serialized_end = 6049 + _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952 + _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999 + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001 + _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049 + _MODELPARAMETER._serialized_start = 6051 + _MODELPARAMETER._serialized_end = 6089 + _MODELWARMUP._serialized_start = 6092 + _MODELWARMUP._serialized_end = 6437 + _MODELWARMUP_INPUT._serialized_start = 6209 + _MODELWARMUP_INPUT._serialized_end = 6360 + _MODELWARMUP_INPUTSENTRY._serialized_start = 6362 + _MODELWARMUP_INPUTSENTRY._serialized_end = 6437 + _MODELOPERATIONS._serialized_start = 6439 + _MODELOPERATIONS._serialized_end = 6485 + _MODELTRANSACTIONPOLICY._serialized_start = 6487 + _MODELTRANSACTIONPOLICY._serialized_end = 6530 + _MODELREPOSITORYAGENTS._serialized_start = 6533 + _MODELREPOSITORYAGENTS._serialized_end = 6763 + _MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615 + _MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763 + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633 + _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682 + _MODELRESPONSECACHE._serialized_start = 6765 + _MODELRESPONSECACHE._serialized_end = 6801 + _MODELCONFIG._serialized_start = 6804 + _MODELCONFIG._serialized_end = 8134 + _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929 + _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984 + _MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986 + _MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035 + _MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037 + _MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113 +# @@protoc_insertion_point(module_scope) From 3a628e36eb5592819c7f1eeb97e1c733f257c257 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 10 Jan 2023 16:48:33 +0800 Subject: [PATCH 48/48] remove chinese tips and remove fastdeploy-python in requirements --- requirements.txt | 1 - .../inference/fastdeploy_client/visualizer.py | 49 ++++++++++++++++++- .../component/inference/fastdeploy_lib.py | 9 ++-- .../component/inference/fastdeploy_server.py | 33 +++++++++---- 4 files changed, 77 insertions(+), 15 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9f492a288..a16a15778 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,6 @@ packaging x2paddle rarfile gradio -fastdeploy-python tritonclient[all] attrdict psutil diff --git a/visualdl/component/inference/fastdeploy_client/visualizer.py b/visualdl/component/inference/fastdeploy_client/visualizer.py index 5abe570e0..2c6abe0b4 100644 --- a/visualdl/component/inference/fastdeploy_client/visualizer.py +++ b/visualdl/component/inference/fastdeploy_client/visualizer.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ======================================================================= -import fastdeploy as fd import numpy as np __all__ = [ @@ -24,6 +23,12 @@ def visualize_detection(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") boxes = np.array(data['boxes']) scores = np.array(data['scores']) label_ids = np.array(data['label_ids']) @@ -40,6 +45,12 @@ def visualize_detection(image, data): def visualize_keypoint_detection(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") keypoints = np.array(data['keypoints']) scores = np.array(data['scores']) num_joints = np.array(data['num_joints']) @@ -54,6 +65,12 @@ def visualize_keypoint_detection(image, data): def visualize_face_detection(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") data = np.array(data['data']) scores = np.array(data['scores']) landmarks = np.array(data['landmarks']) @@ -70,6 +87,12 @@ def visualize_face_detection(image, data): def visualize_face_alignment(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") landmarks = np.array(data['landmarks']) facealignment_result = fd.C.vision.FaceAlignmentResult() @@ -80,6 +103,12 @@ def visualize_face_alignment(image, data): def visualize_segmentation(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") label_ids = np.array(data['label_ids']) score_map = np.array(data['score_map']) shape = np.array(data['shape']) @@ -94,6 +123,12 @@ def visualize_segmentation(image, data): def visualize_matting(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") alpha = np.array(data['alpha']) foreground = np.array(data['foreground']) contain_foreground = data['contain_foreground'] @@ -110,6 +145,12 @@ def visualize_matting(image, data): def visualize_ocr(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") boxes = np.array(data['boxes']) text = np.array(data['text']) rec_scores = np.array(data['rec_scores']) @@ -128,6 +169,12 @@ def visualize_ocr(image, data): def visualize_headpose(image, data): + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") euler_angles = np.array(data['euler_angles']) headpose_result = fd.C.vision.HeadPoseResult() diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py index 3c0138f93..5264c6e77 100644 --- a/visualdl/component/inference/fastdeploy_lib.py +++ b/visualdl/component/inference/fastdeploy_lib.py @@ -131,7 +131,9 @@ def analyse_config(cur_dir: str): all_model_versions[model_name]['1'] = [] if not all_model_configs: - raise Exception('所选择的路径不是一个有效的模型库,请选择正确的路径') + raise Exception( + 'The path you choose is not a valid model repository, please choose a valid path.' + ) return all_model_configs, all_model_versions @@ -454,8 +456,9 @@ def launch_process(kwargs: dict): start_args[key] = value if start_args['server-name'] and start_args['server-name'] in os.listdir( FASTDEPLOYSERVER_PATH): - raise RuntimeError("启动服务失败,服务名称{}已经被使用,请重新填写服务名称".format( - start_args['server-name'])) + raise RuntimeError( + "Failed to launch server,server name {} has been used,please write a different server name." + .format(start_args['server-name'])) all_model_configs, all_model_versions = analyse_config( start_args['model-repository']) model_repo_config = original_format_to_exchange_format( diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py index 6397c33e5..89b0b13ff 100644 --- a/visualdl/component/inference/fastdeploy_server.py +++ b/visualdl/component/inference/fastdeploy_server.py @@ -22,7 +22,6 @@ from multiprocessing import Process from pathlib import Path -import fastdeploy as fd import requests from .fastdeploy_client.client_app import create_gradio_client_app @@ -89,7 +88,8 @@ def config_update(self, cur_dir, model_name, config, config_filename): # backup config filename: {original_name}_vdlbackup_{datetime}.pbtxt # backup config can only used to restore config.pbtxt if 'vdlbackup' in config_filename: - raise RuntimeError("备份的配置文件不允许修改") + raise RuntimeError( + "Backup config file is not permitted to update.") basename = os.path.splitext(config_filename)[0] shutil.copy( os.path.join(model_dir, config_filename), @@ -107,7 +107,8 @@ def start_server(self, configs): process = launch_process(configs) if process.poll() is not None: raise RuntimeError( - "启动fastdeployserver服务器失败,请检查环境中是否存在fastdeployserver程序") + "Failed to launch fastdeployserver,please check fastdeployserver is installed in environment." + ) server_name = configs['server-name'] if configs[ 'server-name'] else str(process.pid) self.opened_servers[server_name] = process @@ -153,8 +154,8 @@ def check_server_alive(self, server_id): self._poll_zombie_process() if check_process_zombie(server_id) is True: raise RuntimeError( - "服务{}由于发生异常或者被kill而退出,通常是由于启动参数设置不当或者环境配置有问题,请检查服务日志查看原因,然后手动关闭该服务项" - .format(server_id)) + "Server {} is down due to exception or killed,please check the reason according to the log, \ + then close this server.".format(server_id)) return @result() @@ -170,7 +171,8 @@ def get_pretrain_model_list(self): 'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels') result = res.json() if result['status'] != 0: - raise RuntimeError("从hub的模型服务器请求模型列表失败") + raise RuntimeError( + "Failed to get pre-trained model list from hub server.") else: data = result['data'] model_list = {} @@ -199,6 +201,12 @@ def download_pretrain_model(self, cur_dir, model_name, version, pretrain_model_name): version_resource_dir = os.path.join( os.path.abspath(cur_dir), model_name, version) + try: + import fastdeploy as fd + except Exception: + raise RuntimeError( + "fastdeploy is required for visualizing results,please refer to \ + https://github.com/PaddlePaddle/FastDeploy to install fastdeploy") model_path = fd.download_model( name=pretrain_model_name, path=version_resource_dir) if model_path: @@ -240,7 +248,9 @@ def download_pretrain_model(self, cur_dir, model_name, version, version_filenames_dict_for_frontend) return version_info_for_frontend else: - raise RuntimeError("预训练模型{}下载失败".format(pretrain_model_name)) + raise RuntimeError( + "Failed to download pre-trained model {}.".format( + pretrain_model_name)) @result() def get_config_for_model(self, cur_dir, name, config_filename): @@ -255,7 +265,8 @@ def delete_config_for_model(self, cur_dir, name, config_filename): if self.root_dir not in Path( os.path.abspath(cur_dir) ).parents: # should prevent user remove files outside model-repository - raise RuntimeError('所删除的文件路径有误') + raise RuntimeError( + 'Failed to delete config file, please check filepath.') if os.path.exists(os.path.join(cur_dir, name, config_filename)): os.remove(os.path.join(cur_dir, name, config_filename)) return get_config_filenames_for_one_model(cur_dir, name) @@ -280,7 +291,8 @@ def delete_resource_for_model(self, cur_dir, model_name, version, if self.root_dir not in Path( os.path.abspath(cur_dir) ).parents: # should prevent user remove files outside model-repository - raise RuntimeError('所删除的文件路径有误') + raise RuntimeError( + 'Failed to delete resource file, please check filepath.') resource_path = os.path.join( os.path.abspath(cur_dir), model_name, version, resource_filename) if os.path.exists(resource_path): @@ -311,7 +323,8 @@ def rename_resource_for_model(self, cur_dir, model_name, version, if self.root_dir not in Path( os.path.abspath(cur_dir) ).parents: # should prevent user remove files outside model-repository - raise RuntimeError('所重命名的文件路径有误') + raise RuntimeError( + 'Failed to rename resource file, please check filepath.') resource_path = os.path.join( os.path.abspath(cur_dir), model_name, version, resource_filename) new_file_path = os.path.join(