Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fastdeploy server and client component #1169

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
f2e6a8f
add backend support for fastdeploy server
rainyfly Nov 21, 2022
a66fa1b
fix
rainyfly Nov 22, 2022
9a92975
add code
rainyfly Nov 23, 2022
65e7a4c
fix
rainyfly Nov 30, 2022
5f00917
fix
rainyfly Dec 1, 2022
8af5ab9
add fastdeploy server component
rainyfly Dec 2, 2022
380b950
add fastdeploy server and client
rainyfly Dec 2, 2022
6ca7547
Merge branch 'develop' of https://github.com/PaddlePaddle/VisualDL in…
rainyfly Dec 2, 2022
16e55fd
add exception description
rainyfly Dec 7, 2022
d4c7145
Merge branch 'develop' of https://github.com/PaddlePaddle/VisualDL in…
rainyfly Dec 7, 2022
ef08cf2
fix
rainyfly Dec 7, 2022
50f0802
add model repository judgement
rainyfly Dec 7, 2022
66064d2
add component tab for fastdeploy client
rainyfly Dec 8, 2022
2418dc5
Merge commit 'refs/pull/1181/head' of https://github.com/PaddlePaddle…
rainyfly Dec 8, 2022
690f55d
update more tasks in fastdeploy client
rainyfly Dec 8, 2022
7893333
sort filenames
rainyfly Dec 8, 2022
e5e41b0
backup config
rainyfly Dec 8, 2022
2d857ae
noqa for autogenerated file
rainyfly Dec 8, 2022
7205209
add data validation
rainyfly Dec 9, 2022
6ac4e5e
add __init__ for package
rainyfly Dec 9, 2022
a4407b8
add calculating layout for frontend
rainyfly Dec 12, 2022
cf65c71
add alive server detection and optimize client
rainyfly Dec 14, 2022
fed60fa
add alive server detection and optimize client
rainyfly Dec 14, 2022
b9f0d07
add alive server detection and optimize client
rainyfly Dec 14, 2022
99af968
add metrics in gradio client
rainyfly Dec 15, 2022
1eb15fc
update presentation
rainyfly Dec 16, 2022
d6abc5a
Change return value to None for frontend performance data when server…
rainyfly Dec 16, 2022
41f5dfd
add get_server_config and download_pretrain_model api
rainyfly Dec 23, 2022
0421892
add get_server_config and download_pretrain_model api
rainyfly Dec 23, 2022
e7c9e53
add unit for metric table
rainyfly Dec 27, 2022
4c67a02
add unit for metric table
rainyfly Dec 27, 2022
15c2340
fix a bug
rainyfly Dec 28, 2022
f4e6a95
add judgement pretrained model download
rainyfly Dec 28, 2022
a31c40c
add judgement pretrained model download
rainyfly Dec 28, 2022
8d36f91
add version info for frontend
rainyfly Dec 28, 2022
cefa9f7
rename download model
rainyfly Dec 29, 2022
86caf85
fix a bug
rainyfly Dec 29, 2022
8c50447
add fastdeploy model list
rainyfly Dec 29, 2022
db7ba0b
optimize for choose configuration files
rainyfly Jan 3, 2023
6ac3b4b
modify according to frontend need
rainyfly Jan 4, 2023
1067387
fix name in config to model name
rainyfly Jan 5, 2023
c37fe8e
optimize for server list and alive judgement
rainyfly Jan 6, 2023
1acff82
keep server name as string type
rainyfly Jan 6, 2023
29c72e8
optimize process judgement logic
rainyfly Jan 6, 2023
92005ce
optimize for deleting resource files
rainyfly Jan 8, 2023
aeb2c9b
add rename resource file
rainyfly Jan 9, 2023
23a6c69
fix
rainyfly Jan 9, 2023
00566df
fix a bug
rainyfly Jan 9, 2023
2ec2a2c
Merge branch 'develop' of https://github.com/PaddlePaddle/VisualDL in…
rainyfly Jan 9, 2023
86c73cd
optimize code structure
rainyfly Jan 10, 2023
7c3c3b7
optimize code structure
rainyfly Jan 10, 2023
3a628e3
remove chinese tips and remove fastdeploy-python in requirements
rainyfly Jan 10, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,8 @@ multiprocess
packaging
x2paddle
rarfile
onnx >= 1.6.0
gradio
tritonclient[all]
attrdict
psutil
onnx >= 1.6.0
14 changes: 14 additions & 0 deletions visualdl/component/inference/fastdeploy_client/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
409 changes: 409 additions & 0 deletions visualdl/component/inference/fastdeploy_client/client_app.py

Large diffs are not rendered by default.

304 changes: 304 additions & 0 deletions visualdl/component/inference/fastdeploy_client/http_client_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
import json
import re

import numpy as np
import requests
import tritonclient.http as httpclient
from attrdict import AttrDict
from tritonclient.utils import InferenceServerException


def convert_http_metadata_config(metadata):
metadata = AttrDict(metadata)

return metadata


def prepare_request(inputs_meta, inputs_data, outputs_meta):
'''
inputs_meta: inputs meta information from model. name: info
inputs_data: users input data. name: data
'''
# Set the input data
inputs = []
for input_dict in inputs_meta:
input_name = input_dict['name']
if input_name not in inputs_data:
raise RuntimeError(
'Error: input name {} required for model not existed.'.format(
input_name))
if input_dict['datatype'] == 'FP32':
inputs_data[input_name] = inputs_data[input_name].astype(
np.float32
) / 255 # image data returned by gradio is uint8, convert to fp32
if len(input_dict['shape']
) == 3 and input_dict['shape'][0] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name][0].transpose(
2, 0, 1)
elif len(input_dict['shape']
) == 4 and input_dict['shape'][1] == 3: # NCHW
inputs_data[input_name] = inputs_data[input_name].transpose(
0, 3, 1, 2)
infer_input = httpclient.InferInput(
input_name, inputs_data[input_name].shape, input_dict['datatype'])
infer_input.set_data_from_numpy(inputs_data[input_name])
inputs.append(infer_input)
outputs = []
for output_dict in outputs_meta:
infer_output = httpclient.InferRequestedOutput(output_dict.name)
outputs.append(infer_output)
return inputs, outputs


metrics_table_head = """
<style>
table, th {{
border:0.1px solid black;
}}
</style>

<div>
<table style="width:100%">
<tr>
<th rowspan="2">模型名称</th>
<th colspan="4">执行统计</th>
<th colspan="5">延迟统计</th>

</tr>
<tr>
<th>请求处理成功数</th>
<th>请求处理失败数</th>
<th>推理batch数</th>
<th>推理样本数</th>
<th>请求处理时间(ms)</th>
<th>任务队列等待时间(ms)</th>
<th>输入处理时间(ms)</th>
<th>模型推理时间(ms)</th>
<th>输出处理时间(ms)</th>
</tr>
{}
</table>
</div>
<br>
<br>
<br>
<br>
<br>
<div>
<table style="width:100%">
<tr>
<th rowspan="2">GPU</th>
<th colspan="4">性能指标</th>
<th colspan="2">显存</th>
</tr>
<tr>
<th>利用率(%)</th>
<th>功率(W)</th>
<th>功率限制(W)</th>
<th>耗电量(W)</th>
<th>总量(GB)</th>
<th>已使用(GB)</th>
</tr>
{}
</table>
</div>
"""


def get_metric_data(server_addr, metric_port): # noqa:C901
'''
Get metrics data from fastdeploy server, and transform it into html table.
Args:
server_addr(str): fastdeployserver ip address
metric_port(int): fastdeployserver metrics port
Returns:
htmltable(str): html table to show metrics data
'''
model_table = {}
gpu_table = {}
metric_column_name = {
"Model": {
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_count", "nv_inference_exec_count",
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
},
"GPU": {
"nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_utilization",
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
},
"CPU": {
"nv_cpu_utilization", "nv_cpu_memory_total_bytes",
"nv_cpu_memory_used_bytes"
}
}
try:
res = requests.get("http://{}:{}/metrics".format(
server_addr, metric_port))
except Exception:
return metrics_table_head.format('', '')
metric_content = res.text
for content in metric_content.split('\n'):
if content.startswith('#'):
continue
else:
res = re.match(r'(\w+){(.*)} (\w+)',
content) # match output by server metrics interface
if not res:
continue
metric_name = res.group(1)
model = res.group(2)
value = res.group(3)
infos = {}
for info in model.split(','):
k, v = info.split('=')
v = v.strip('"')
infos[k] = v
if metric_name in [
"nv_inference_request_duration_us",
"nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]:
value = str(float(value) / 1000)
elif metric_name in [
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
]:
value = str(float(value) / 1024 / 1024 / 1024)
for key, metric_names in metric_column_name.items():
if metric_name in metric_names:
if key == 'Model':
model_name = infos['model']
if model_name not in model_table:
model_table[model_name] = {}
model_table[model_name][metric_name] = value
elif key == 'GPU':
gpu_name = infos['gpu_uuid']
if gpu_name not in gpu_table:
gpu_table[gpu_name] = {}
gpu_table[gpu_name][metric_name] = value
elif key == 'CPU':
pass
model_data_list = []
gpu_data_list = []
model_data_metric_names = [
"nv_inference_request_success", "nv_inference_request_failure",
"nv_inference_exec_count", "nv_inference_count",
"nv_inference_request_duration_us", "nv_inference_queue_duration_us",
"nv_inference_compute_input_duration_us",
"nv_inference_compute_infer_duration_us",
"nv_inference_compute_output_duration_us"
]
gpu_data_metric_names = [
"nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit",
"nv_energy_consumption", "nv_gpu_memory_total_bytes",
"nv_gpu_memory_used_bytes"
]
for k, v in model_table.items():
data = []
data.append(k)
for data_metric in model_data_metric_names:
data.append(v[data_metric])
model_data_list.append(data)
for k, v in gpu_table.items():
data = []
data.append(k)
for data_metric in gpu_data_metric_names:
data.append(v[data_metric])
gpu_data_list.append(data)
model_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in model_data_list
])
gpu_data = '\n'.join([
"<tr>" + '\n'.join(["<td>" + item + "</td>"
for item in data]) + "</tr>"
for data in gpu_data_list
])
return metrics_table_head.format(model_data, gpu_data)


class HttpClientManager:
def __init__(self):
self.clients = {} # server url: httpclient

def _create_client(self, server_url):
if server_url in self.clients:
return self.clients[server_url]
try:
fastdeploy_client = httpclient.InferenceServerClient(server_url)
self.clients[server_url] = fastdeploy_client
return fastdeploy_client
except Exception:
raise RuntimeError(
'Can not connect to server {}, please check your \
server address'.format(server_url))

def infer(self, server_url, model_name, model_version, inputs):
fastdeploy_client = self._create_client(server_url)
input_metadata, output_metadata = self.get_model_meta(
server_url, model_name, model_version)
inputs, outputs = prepare_request(input_metadata, inputs,
output_metadata)
response = fastdeploy_client.infer(
model_name, inputs, model_version=model_version, outputs=outputs)

results = {}
for output in output_metadata:
result = response.as_numpy(output.name) # datatype: numpy
if output.datatype == 'BYTES': # datatype: bytes
try:
value = result
if len(result.shape) == 1:
value = result[0]
elif len(result.shape) == 2:
value = result[0][0]
elif len(result.shape) == 3:
value = result[0][0][0]
result = json.loads(value) # datatype: json
except Exception:
pass
else:
result = result[0]
results[output.name] = result
return results

def raw_infer(self, server_url, model_name, model_version, raw_input):
url = 'http://{}/v2/models/{}/versions/{}/infer'.format(
server_url, model_name, model_version)
res = requests.post(url, data=json.dumps(json.loads(raw_input)))
return json.dumps(res.json())

def get_model_meta(self, server_url, model_name, model_version):
fastdeploy_client = self._create_client(server_url)
try:
model_metadata = fastdeploy_client.get_model_metadata(
model_name=model_name, model_version=model_version)
except InferenceServerException as e:
raise RuntimeError("Failed to retrieve the metadata: " + str(e))

model_metadata = convert_http_metadata_config(model_metadata)

input_metadata = model_metadata.inputs
output_metadata = model_metadata.outputs
return input_metadata, output_metadata
Loading