Skip to content

Commit

Permalink
v1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Naplesoul committed Feb 17, 2023
1 parent 2813f89 commit d61059f
Show file tree
Hide file tree
Showing 231 changed files with 11,442 additions and 6,655 deletions.
5 changes: 5 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.pb filter=lfs diff=lfs merge=lfs -text
*.index filter=lfs diff=lfs merge=lfs -text
*.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
benchmarks/inputs/imagenet/* filter=lfs diff=lfs merge=lfs -text
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
logs
build
install
.vscode
.DS_store

*.plan
*.DS_store
*.tar.gz
16 changes: 13 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
cmake_minimum_required(VERSION 3.5)
project(DISB)

option(SAMPLE_TENSORRT "whether to build tensorrt sample" FALSE)
option(SAMPLE_TFSERVING "whether to build tfserving sample" FALSE)
option(BUILD_TENSORRT "whether to build tensorrt" OFF)
option(BUILD_TRITON "whether to build triton" OFF)
option(BUILD_TFSERVING "whether to build tfserving" OFF)
option(BUILD_REEF "whether to build reef" OFF)

include_directories("${PROJECT_SOURCE_DIR}/include")
aux_source_directory("${PROJECT_SOURCE_DIR}/src" disb_src)
Expand All @@ -11,4 +13,12 @@ add_library(disb STATIC ${disb_src})

target_link_libraries(disb jsoncpp pthread)

add_subdirectory(samples)
file(GLOB_RECURSE include_src
${CMAKE_CURRENT_LIST_DIR}/include/*.h
)

install(TARGETS disb ARCHIVE DESTINATION lib)
install(FILES ${include_src} DESTINATION include)

add_subdirectory(samples)
add_subdirectory(benchmarks/frameworks)
114 changes: 114 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# BUILD_TYPE = Debug / Release
BUILD_TYPE = Release

BUILD_TENSORRT = OFF
BUILD_TRITON = OFF
BUILD_TFSERVING = OFF

DISB_PATH = $(shell pwd)
BUILD_PATH = ${DISB_PATH}/build
INSTALL_PATH = ${DISB_PATH}/install

TESTCASES = A B C D E REAL

.PHONY: build
build: ${BUILD_PATH}/CMakeCache.txt
rm -rf ${INSTALL_PATH}; \
cmake --build ${BUILD_PATH} --target install -- -j$(shell nproc)

.PHONY: install
install: build

${BUILD_PATH}/CMakeCache.txt:
${MAKE} configure

.PHONY: configure
configure:
cmake -B${BUILD_PATH} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} \
-DBUILD_TENSORRT=${BUILD_TENSORRT} \
-DBUILD_TRITON=${BUILD_TRITON} \
-DBUILD_TFSERVING=${BUILD_TFSERVING}

.PHONY: trt
trt:
${MAKE} clean; \
${MAKE} BUILD_TENSORRT=ON

.PHONY: triton
triton:
${MAKE} clean; \
${MAKE} BUILD_TRITON=ON

.PHONY: tfs
tfs:
${MAKE} clean; \
${MAKE} BUILD_TFSERVING=ON

.PHONY: trt-test
trt-test:
@for testcase in ${TESTCASES}; do ${DISB_PATH}/run.sh trt $$testcase; done; \
echo "Testcases completed, logs are under ${DISB_PATH}/logs, results are under ${DISB_PATH}/benchmarks/results"

.PHONY: triton-test
triton-test:
@for testcase in ${TESTCASES}; do ${DISB_PATH}/run.sh triton $$testcase; done; \
echo "Testcases completed, logs are under ${DISB_PATH}/logs, results are under ${DISB_PATH}/benchmarks/results"

.PHONY: tfs-test
tfs-test:
@for testcase in ${TESTCASES}; do ${DISB_PATH}/run.sh tfs $$testcase; done; \
echo "Testcases completed, logs are under ${DISB_PATH}/logs, results are under ${DISB_PATH}/benchmarks/results"

.PHONY: trt-container
trt-container:
docker run -it \
--name disb-trt8.4 \
--gpus all \
-v ${PWD}:/workspace/disb \
shenwhang/disb-trt8.4:0.1 \
/bin/bash

.PHONY: triton-front
triton-front:
docker run -it \
--name disb-triton-client \
--net=host \
-v ${PWD}:/workspace/disb \
shenwhang/disb-triton-client:0.2 \
/bin/bash

.PHONY: triton-back
triton-back:
docker run --rm \
--name disb-triton-server \
--gpus all \
-p8000:8000 -p8001:8001 -p8002:8002 \
-v ${PWD}/benchmarks/models:/models \
nvcr.io/nvidia/tritonserver:22.08-py3 \
tritonserver \
--model-repository=/models

.PHONY: tfs-front
tfs-front:
docker run -it \
--name disb-tfs-client \
--network host \
-v ${PWD}:/workspace/disb \
shenwhang/disb-tfs-client:1.0 \
/bin/bash

.PHONY: tfs-back
tfs-back:
docker run -it --rm \
--name disb-tfs-server \
--gpus all \
-p8500:8500 -p8501:8501 \
-v $(DISB_PATH)/benchmarks/models:/models \
tensorflow/serving:2.5.4-gpu \
--model_config_file=/models/models.config

.PHONY: clean
clean:
rm -rf ${BUILD_PATH} ${INSTALL_PATH}
86 changes: 46 additions & 40 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# DISB: DNN Inference Serving Benchmark

DISB is a DNN inference serving benchmark with diverse workloads and models. It was originally designed to simulate real-time scenarios, e.g. autonomous driving systems, where both low latency and high throughput are demanded.
**DISB** is a **D**NN **I**nference **S**erving **B**enchmark with diverse workloads and models. It was originally designed to simulate real-time scenarios, e.g. autonomous driving systems, where both low latency and high throughput are demanded.

DISB uses the client-server architecture, where the clients send the DNN inference requests to the server via RPC, and the server returns the inference result. Clients can submit the inference requests periodically or randomly. An inference request should contain the model name (or id), the input data and other customized attributes (e.g., priority or deadline).
DISB uses the client-server architecture, where the clients send the DNN inference requests to the server via RPC, and the server returns the inference result. Clients can submit the inference requests periodically or randomly. An inference request may contain the model name (or id), the input data and other customized attributes (e.g., priority or deadline).

**Note:** Please use git lfs to clone this repo in order to download model files.



Expand All @@ -13,7 +15,7 @@ DISB uses the client-server architecture, where the clients send the DNN inferen
- [Build & Install](#build--install)
- [Usage](#usage)
- [Samples](#samples)
- [Benchmark Result](#benchmark-result)
- [Benchmark Results](#benchmark-results)
- [Paper](#paper)
- [The Team](#the-team)
- [Contact Us](#contact-us)
Expand All @@ -23,7 +25,7 @@ DISB uses the client-server architecture, where the clients send the DNN inferen

## DISB Toolkit

DISB provides a C++ library (`libdisb`) to perform benchmarking. To integrate your own DNN inference system with DISB, you only need to implement a `DISBClient` to wrap your inference interface. See [usage](#usage) for details.
DISB provides a C++ library (`libdisb`) to perform benchmarking. To integrate your own DNN inference system with DISB, you only need to implement `DISB::Client` to wrap your inference interface. See [usage](#usage) for details.



Expand All @@ -32,13 +34,15 @@ DISB provides a C++ library (`libdisb`) to perform benchmarking. To integrate yo
Currently, DISB provides 5 workloads with different DNN models and different number of clients.

There are three pattern for submitting inference requests in DISB clients:
1. Uniform Distribution (U): the client sends inference requests periodically, with a fixed frequency (e.g., 20 reqs/s). This pattern is common in data-driven applications (e.g., obstacle detection with cameras).
2. Possion Distribution (P), the client sends inference requests with a Poisson arrival distribution. This pattern can simulate event-driven applications (e.g., speech recoginition).
3. Closed-loop (C), the client continuously issues inference requests, which simulates a contention load.
1. Uniform Distribution (U): The client sends inference requests periodically, with a fixed frequency (e.g., 20 reqs/s). This pattern is common in data-driven applications (e.g., obstacle detection with cameras).
2. Poisson Distribution (P): The client sends inference requests in a Poisson distribution pattern with a given average arrival speed (e.g., 25 reqs/s). This pattern can simulate event-driven applications (e.g., speech recoginition).
3. Closed-loop (C): The client continuously sends inference requests, which simulates a contention load.
4. Trace (T): The client sends inference requests according to a given trace file which contains a series of request time points. This pattern can reproduce real world workloads.
5. Dependent (D): The client sends inference requests when all prior tasks have completed, prior tasks can be other clients. This pattern can simulate inference graph (or inference DAG), where a model need the output of another model as its input.

See [workloads](./workloads.md) for workload details.
We combined these patterns into 6 typical workloads for benchmarks, see [workloads](benchmarks/workloads/workloads.md) for workload details.

[TBD] We're still working on providing more representative and general DNN inference serving workloads. We will support dependent load (inference DAG) in next release.
[TBD] We're still working on providing more representative and general DNN inference serving workloads.



Expand All @@ -54,13 +58,8 @@ sudo apt install libjsoncpp-dev
Build and install DISB tools:

```shell
# build libdisb.a
cmake -B build
cd build
make -j$(nproc)

# install
sudo make install
# will build and install into disb/install
make build
```


Expand All @@ -72,24 +71,35 @@ sudo make install
`DISB::Client` is an adaptor class between DISB and the serving backend. You can implement the following interfaces in its subclass. These interfaces will be called during the benchmark, and their execution time will be recorded by DISB.

```c++
# init() will be called once the benchmark begin
# init() will be called once when the benchmark begins
virtual void init();

# The following interfaces will be called by DISB
# within each inference request during benchmark.
# Average latency of each interface will be recorded.
virtual void prepareInput();
virtual void preprocess();
virtual void copyInput();
virtual void infer();
virtual void copyOutput();
virtual void postprocess();

# If another task dependents on this client,
# the InferResult will be passed to the next task.
virtual std::shared_ptr<InferResult> produceResult();
```



- ### Strategy
- ### Load

`DISB::Strategy` instructs when DISB should launch next inference request. There are two built-in strategies in `DISB::PeriodicStrategy` (launch inference request at a given frequency periodically) and `DISB::TraceStrategy` (launch inference request according to a given trace). They can be enabled by setting certain attribute in json configuration, see [HelloDISB](samples/hellodisb) for example.
`DISB::Load` instructs when DISB should launch the next inference request. There are 5 built-in loads simulating the load patterns mentioned in [DISB Workloads](#disb-workloads). They can be enabled by setting certain attributes in json configuration, see [HelloDISB](samples/hellodisb) for example.



If you want to use `DISB::DependentLoad`, your client class should inherit `DISB::DependentClient` and implement the virtual methods `consumePrevResults()` and `produceDummyPrevResults()`. `consumePrevResults()` will be called when one of the prior tasks finished one inference and produced one result. You can use the previous results as the input of the DependentClient. You can also inherit `DISB::InferResult` to pass custom data. `produceDummyPrevResults()` will be called when DISB

is warming up and testing the standalone latency of each client. The results will be consumed by `consumePrevResults()`, making a dependent load become independent in order to measure standalone latency.



Expand All @@ -100,30 +110,30 @@ sudo make install
```c++
void init(const std::string &configJsonStr,
std::shared_ptr<Client> clientFactory(const Json::Value &config),
std::shared_ptr<Strategy> strategyFactory(const Json::Value &config) = builtinStrategyFactory);
std::shared_ptr<Load> loadFactory(const Json::Value &config) = builtinLoadFactory);

void run(void strategyCoordinator(const std::vector<StrategyInfo> &strategyInfos) = builtinStrategyCoordinator);
void run(void loadCoordinator(const std::vector<LoadInfo> &loadInfos) = builtinLoadCoordinator);
```
When initializing BenchmarkSuite, a json format string should be passed as config, and a factory method of your own subclass implementation of `DISB::Client` should be provided. The `Json::Value` passed to the factory method is the `"client"` attribute in each task in `configJsonStr`.
When initializing BenchmarkSuite, a json formatted string should be passed as config, and a factory method of your own subclass implementation of `DISB::Client` should be provided. The `Json::Value` passed to the factory method is the `"client"` attribute in each task in `configJsonStr`.
If you want to custom strategies other than `DISB::PeriodicStrategy` or `DISB::TraceStrategy`, you should implement the virtual method `std::chrono::system_clock::time_point nextLaunchTime(const std::chrono::system_clock::time_point &now)` and provide your own strategy factory method. The `Json::Value` passed to the factory method is the `"strategy"` attribute in each task in `configJsonStr`.
If you need customized loads other than the built-in loads, you should implement the virtual method `waitUntilNextLaunch()` and provide your own load factory method. The `Json::Value` passed to the factory method is the `"load"` attribute in each task in `configJsonStr`.
If your strategies have to coordinate with each other, you can pass `strategyCoordinator()` to `DISB::BenchmarkSuite::run()`, which makes sure that strategies will not conflict with each other. For example, the `builtinStrategyCoordinator()` will prevent the periodic strategies with the same frequency and the highest priority from launching at the same time by taking standalone latency of each client into account.
If your loads need to coordinate with each other, you can pass `loadCoordinator()` to `DISB::BenchmarkSuite::run()`, which makes sure that loads will not conflict with each other. For example, the `builtinLoadCoordinator()` will prevent the periodic loads with the same frequency and the highest priority from launching at the same time by setting different launch delay.
- ### Analyzer
`DISB::Analyzer` is used to measure the performance of each inference task, each inference task can have multiple analyzers. `DISB::BasicAnalyzer`, which can measure latency and throughput is implemented by DISB and is enabled for every task by default.
`DISB::Analyzer` is used to measure the performance of each inference task, each inference task can have multiple analyzers. `DISB::BasicAnalyzer`, which can measure latency and throughput, is implemented by DISB and is enabled for every task by default.
If you want to custom analyzers other than `DISB::BasicAnalyzer`, for example measures gpu usage and memory consumption, the following interfaces should be implemented.
If you want customized analyzers other than `DISB::BasicAnalyzer`, for example, an analyzer that measures gpu usage and memory consumption, the following interfaces should be implemented.
```c++
virtual void init();
Expand Down Expand Up @@ -156,11 +166,11 @@ sudo make install



After you have implemented `DISB::Analyzer`, you can add it to a `DISB::Client` by calling `DISB::Client::addAnalyzer()` in the factory method of client. You may refer to [TensorRT sample](samples/tensorrt/README.md) or [Tensorflow Serving sample](samples/tfserving/README.md) for more details. They both implement an `AccuarcyAnalyzer` to measure inference accuarcy.
After you have implemented `DISB::Analyzer`, you can add it to a `DISB::Client` by calling `DISB::Client::addAnalyzer()` in the factory method of client. You may refer to [TensorRT sample](benchmarks/frameworks/tensorrt/README.md) or [Tensorflow Serving sample](benchmarks/frameworks/tfserving/README.md) for more details. They both implement an `AccuarcyAnalyzer` to measure inference accuarcy.



- ### Run a benchmark
- ### Run benchmarks

```c++
#include "disb.h"
Expand Down Expand Up @@ -201,22 +211,18 @@ sudo make install
A simple sample that shows how DISB works, needs no extra dependencies.
- [TensorRT](samples/tensorrt/README.md)
A sample serves MNIST inference requests directly using TensorRT as serving backend, needs CUDA environment to compile.
You can enable its compiling by adding a cmake parameter: `-DSAMPLE_TENSORRT=ON`.
- [Tensorflow Serving](samples/tfserving/README.md)
A sample serves ResNet inference requests using Tensorflow Serving as serving backend, needs gRPC environment to compile.
You can enable its compiling by adding a cmake parameter: `-DSAMPLE_TFSERVING=ON`.
## Benchmark Results
We have supported DISB on some mainstream DNN inference serving frameworks, including:
- [TensorRT](benchmarks/frameworks/tensorrt/README.md)
- [Triton](benchmarks/frameworks/triton/README.md)
- [Tensorflow Serving](benchmarks/frameworks/tfserving/README.md)
## Benchmark Result
We tested these DNN inference serving frameworks under 6 [DISB Workloads](#disb-workloads). Test results are shown in [results.md](benchmarks/results/results.md).
[TBD] We will provide the benchmark result of common DNN inference framework on DISB in next release.
[TBD] We're still working on supporting more DNN inference serving frameworks.
Expand Down Expand Up @@ -260,4 +266,4 @@ Rong Chen: rongchen@sjtu.edu.cn

## License

DISB is released under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0.html).
DISB is released under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0.html).
28 changes: 28 additions & 0 deletions benchmarks/benchmarks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Benchmarks

We have supported DISB on some mainstream [DNN inference serving frameworks](frameworks), including:

- [TensorRT](frameworks/tensorrt/README.md)
- [Triton](frameworks/triton/README.md)
- [Tensorflow Serving](frameworks/tfserving/README.md)
[TBD] We're still working on supporting more DNN inference serving frameworks.



Test results: [results](results/results.md)

Workloads: [workloads](workloads)

Supported models: [models](models)

- VGG-19
- ResNet-50
- ResNet-152
- MobileNetV1
- InceptionV3
- DistilBert
- DenseNet-201

Supported inputs: [inputs](inputs)

- ImageNet
12 changes: 12 additions & 0 deletions benchmarks/frameworks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
if (BUILD_TENSORRT)
add_subdirectory(tensorrt)
endif()

if (BUILD_TRITON)
add_subdirectory(triton)
endif()

if (BUILD_TFSERVING)
add_subdirectory(tfserving)
endif()

Loading

0 comments on commit d61059f

Please sign in to comment.