Skip to content

Commit

Permalink
Add fastdeploy server and client component (#1169)
Browse files Browse the repository at this point in the history
* add backend support for fastdeploy server

* fix

* add code

* fix

* fix

* add fastdeploy server component

* add fastdeploy server and client

* add exception description

* fix

* add model repository judgement

* add component tab for fastdeploy client

* update more tasks in fastdeploy client

* sort filenames

* backup config

* noqa for autogenerated file

* add data validation

* add __init__ for package

* add calculating layout for frontend

* add alive server detection and optimize client

* add alive server detection and optimize client

* add alive server detection and optimize client

* add metrics in gradio client

* update presentation

* Change return value to None for frontend performance data when server not ready

* add get_server_config and download_pretrain_model api

* add get_server_config and download_pretrain_model api

* add unit for metric table

* add unit for metric table

* fix a bug

* add judgement pretrained model download

* add judgement pretrained model download

* add version info for frontend

* rename download model

* fix a bug

* add fastdeploy model list

* optimize for choose configuration files

* modify according to frontend need

* fix name in config to model name

* optimize for server list and alive judgement

* keep server name as string type

* optimize process judgement logic

* optimize for deleting resource files

* add rename resource file

* fix

* fix a bug

* optimize code structure

* optimize code structure

* remove chinese tips and remove fastdeploy-python in requirements
  • Loading branch information
rainyfly authored Jan 10, 2023
1 parent b90619b commit a418dd4
Show file tree
Hide file tree
Showing 14 changed files with 5,145 additions and 3 deletions.
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@ multiprocess
packaging
x2paddle
rarfile
onnx >= 1.6.0
gradio
tritonclient[all]
attrdict
psutil
onnx >= 1.6.0
14 changes: 14 additions & 0 deletions visualdl/component/inference/fastdeploy_client/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
409 changes: 409 additions & 0 deletions visualdl/component/inference/fastdeploy_client/client_app.py

Large diffs are not rendered by default.

304 changes: 304 additions & 0 deletions visualdl/component/inference/fastdeploy_client/http_client_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import json
import re

import numpy as np
import requests
import tritonclient.http as httpclient
from attrdict import AttrDict
from tritonclient.utils import InferenceServerException


def convert_http_metadata_config(metadata):
metadata = AttrDict(metadata)

return metadata


def prepare_request(inputs_meta, inputs_data, outputs_meta):
'''
inputs_meta: inputs meta information from model. name: info
inputs_data: users input data. name: data
'''
# Set the input data
inputs = []
for input_dict in inputs_meta:
input_name = input_dict['name']
if input_name not in inputs_data:
raise RuntimeError(
'Error: input name {} required for model not existed.'.format(
input_name))
if input_dict['datatype'] == 'FP32':
inputs_data[input_name] = inputs_data[input_name].astype(
np.float32
) / 255 # image data returned by gradio is uint8, convert to fp32
if len(input_dict['shape']
) == 3 and input_dict['shape'][0] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name][0].transpose(
2, 0, 1)
elif len(input_dict['shape']
) == 4 and input_dict['shape'][1] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name].transpose(
0, 3, 1, 2)
infer_input = httpclient.InferInput(
input_name, inputs_data[input_name].shape, input_dict['datatype'])
infer_input.set_data_from_numpy(inputs_data[input_name])
inputs.append(infer_input)
outputs = []
for output_dict in outputs_meta:
infer_output = httpclient.InferRequestedOutput(output_dict.name)
outputs.append(infer_output)
return inputs, outputs


metrics_table_head = """
<style>
table, th {{
border:0.1px solid black;
}}
</style>
<div>
<table style="width:100%">
<tr>
<th rowspan="2">模型名称</th>
<th colspan="4">执行统计</th>
<th colspan="5">延迟统计</th>
</tr>
<tr>
<th>请求处理成功数</th>
<th>请求处理失败数</th>
<th>推理batch数</th>
<th>推理样本数</th>
<th>请求处理时间(ms)</th>
<th>任务队列等待时间(ms)</th>
<th>输入处理时间(ms)</th>
<th>模型推理时间(ms)</th>
<th>输出处理时间(ms)</th>
</tr>
{}
</table>
</div>
<br>
<br>
<br>
<br>
<br>
<div>
<table style="width:100%">
<tr>
<th rowspan="2">GPU</th>
<th colspan="4">性能指标</th>
<th colspan="2">显存</th>
</tr>
<tr>
<th>利用率(%)</th>
<th>功率(W)</th>
<th>功率限制(W)</th>
<th>耗电量(W)</th>
<th>总量(GB)</th>
<th>已使用(GB)</th>
</tr>
{}
</table>
</div>
"""


def get_metric_data(server_addr, metric_port): # noqa:C901
'''
Get metrics data from fastdeploy server, and transform it into html table.
Args:
server_addr(str): fastdeployserver ip address
metric_port(int): fastdeployserver metrics port
Returns:
htmltable(str): html table to show metrics data
'''
model_table = {}
gpu_table = {}
metric_column_name = {
"Model": {
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_count", "nv_inference_exec_count",
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
},
"GPU": {
"nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_utilization",
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
},
"CPU": {
"nv_cpu_utilization", "nv_cpu_memory_total_bytes",
"nv_cpu_memory_used_bytes"
}
}
try:
res = requests.get("http://{}:{}/metrics".format(
server_addr, metric_port))
except Exception:
return metrics_table_head.format('', '')
metric_content = res.text
for content in metric_content.split('\n'):
if content.startswith('#'):
continue
else:
res = re.match(r'(\w+){(.*)} (\w+)',
content) # match output by server metrics interface
if not res:
continue
metric_name = res.group(1)
model = res.group(2)
value = res.group(3)
infos = {}
for info in model.split(','):
k, v = info.split('=')
v = v.strip('"')
infos[k] = v
if metric_name in [
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]:
value = str(float(value) / 1000)
elif metric_name in [
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
]:
value = str(float(value) / 1024 / 1024 / 1024)
for key, metric_names in metric_column_name.items():
if metric_name in metric_names:
if key == 'Model':
model_name = infos['model']
if model_name not in model_table:
model_table[model_name] = {}
model_table[model_name][metric_name] = value
elif key == 'GPU':
gpu_name = infos['gpu_uuid']
if gpu_name not in gpu_table:
gpu_table[gpu_name] = {}
gpu_table[gpu_name][metric_name] = value
elif key == 'CPU':
pass
model_data_list = []
gpu_data_list = []
model_data_metric_names = [
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_exec_count", "nv_inference_count",
"nv_inference_request_duration_us", "nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]
gpu_data_metric_names = [
"nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_memory_total_bytes",
"nv_gpu_memory_used_bytes"
]
for k, v in model_table.items():
data = []
data.append(k)
for data_metric in model_data_metric_names:
data.append(v[data_metric])
model_data_list.append(data)
for k, v in gpu_table.items():
data = []
data.append(k)
for data_metric in gpu_data_metric_names:
data.append(v[data_metric])
gpu_data_list.append(data)
model_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in model_data_list
])
gpu_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in gpu_data_list
])
return metrics_table_head.format(model_data, gpu_data)


class HttpClientManager:
def __init__(self):
self.clients = {} # server url: httpclient

def _create_client(self, server_url):
if server_url in self.clients:
return self.clients[server_url]
try:
fastdeploy_client = httpclient.InferenceServerClient(server_url)
self.clients[server_url] = fastdeploy_client
return fastdeploy_client
except Exception:
raise RuntimeError(
'Can not connect to server {}, please check your \
server address'.format(server_url))

def infer(self, server_url, model_name, model_version, inputs):
fastdeploy_client = self._create_client(server_url)
input_metadata, output_metadata = self.get_model_meta(
server_url, model_name, model_version)
inputs, outputs = prepare_request(input_metadata, inputs,
output_metadata)
response = fastdeploy_client.infer(
model_name, inputs, model_version=model_version, outputs=outputs)

results = {}
for output in output_metadata:
result = response.as_numpy(output.name) # datatype: numpy
if output.datatype == 'BYTES': # datatype: bytes
try:
value = result
if len(result.shape) == 1:
value = result[0]
elif len(result.shape) == 2:
value = result[0][0]
elif len(result.shape) == 3:
value = result[0][0][0]
result = json.loads(value) # datatype: json
except Exception:
pass
else:
result = result[0]
results[output.name] = result
return results

def raw_infer(self, server_url, model_name, model_version, raw_input):
url = 'http://{}/v2/models/{}/versions/{}/infer'.format(
server_url, model_name, model_version)
res = requests.post(url, data=json.dumps(json.loads(raw_input)))
return json.dumps(res.json())

def get_model_meta(self, server_url, model_name, model_version):
fastdeploy_client = self._create_client(server_url)
try:
model_metadata = fastdeploy_client.get_model_metadata(
model_name=model_name, model_version=model_version)
except InferenceServerException as e:
raise RuntimeError("Failed to retrieve the metadata: " + str(e))

model_metadata = convert_http_metadata_config(model_metadata)

input_metadata = model_metadata.inputs
output_metadata = model_metadata.outputs
return input_metadata, output_metadata
Loading

0 comments on commit a418dd4

Please sign in to comment.