v1.0

SJTU-IPADS · Feb 17, 2023 · d61059f · d61059f
1 parent 2813f89
commit d61059f
Show file tree

Hide file tree

Showing 231 changed files with 11,442 additions and 6,655 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,5 @@
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.index filter=lfs diff=lfs merge=lfs -text
+*.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+benchmarks/inputs/imagenet/* filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,8 @@
+logs
 build
+install
 .vscode
-.DS_store
+
+*.plan
+*.DS_store
+*.tar.gz
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,8 +1,10 @@
 cmake_minimum_required(VERSION 3.5)
 project(DISB)
 
-option(SAMPLE_TENSORRT "whether to build tensorrt sample" FALSE)
-option(SAMPLE_TFSERVING "whether to build tfserving sample" FALSE)
+option(BUILD_TENSORRT "whether to build tensorrt" OFF)
+option(BUILD_TRITON "whether to build triton" OFF)
+option(BUILD_TFSERVING "whether to build tfserving" OFF)
+option(BUILD_REEF "whether to build reef" OFF)
 
 include_directories("${PROJECT_SOURCE_DIR}/include")
 aux_source_directory("${PROJECT_SOURCE_DIR}/src" disb_src)
@@ -11,4 +13,12 @@ add_library(disb STATIC ${disb_src})
 
 target_link_libraries(disb jsoncpp pthread)
 
-add_subdirectory(samples)
+file(GLOB_RECURSE include_src
+    ${CMAKE_CURRENT_LIST_DIR}/include/*.h
+)
+
+install(TARGETS disb ARCHIVE DESTINATION lib)
+install(FILES ${include_src} DESTINATION include)
+
+add_subdirectory(samples)
+add_subdirectory(benchmarks/frameworks)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,114 @@
+# BUILD_TYPE				= Debug / Release
+BUILD_TYPE					= Release
+
+BUILD_TENSORRT				= OFF
+BUILD_TRITON				= OFF
+BUILD_TFSERVING				= OFF
+
+DISB_PATH					= $(shell pwd)
+BUILD_PATH					= ${DISB_PATH}/build
+INSTALL_PATH				= ${DISB_PATH}/install
+
+TESTCASES					= A B C D E REAL
+
+.PHONY: build
+build: ${BUILD_PATH}/CMakeCache.txt
+	rm -rf ${INSTALL_PATH}; \
+	cmake --build ${BUILD_PATH} --target install -- -j$(shell nproc)
+
+.PHONY: install
+install: build
+
+${BUILD_PATH}/CMakeCache.txt:
+	${MAKE} configure
+
+.PHONY: configure
+configure:
+	cmake -B${BUILD_PATH} \
+		  -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+		  -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} \
+		  -DBUILD_TENSORRT=${BUILD_TENSORRT} \
+		  -DBUILD_TRITON=${BUILD_TRITON} \
+		  -DBUILD_TFSERVING=${BUILD_TFSERVING}
+
+.PHONY: trt
+trt:
+	${MAKE} clean; \
+	${MAKE} BUILD_TENSORRT=ON
+
+.PHONY: triton
+triton:
+	${MAKE} clean; \
+	${MAKE} BUILD_TRITON=ON
+
+.PHONY: tfs
+tfs:
+	${MAKE} clean; \
+	${MAKE} BUILD_TFSERVING=ON
+
+.PHONY: trt-test
+trt-test:
+	@for testcase in ${TESTCASES}; do ${DISB_PATH}/run.sh trt $$testcase; done; \
+	echo "Testcases completed, logs are under ${DISB_PATH}/logs, results are under ${DISB_PATH}/benchmarks/results"
+
+.PHONY: triton-test
+triton-test:
+	@for testcase in ${TESTCASES}; do ${DISB_PATH}/run.sh triton $$testcase; done; \
+	echo "Testcases completed, logs are under ${DISB_PATH}/logs, results are under ${DISB_PATH}/benchmarks/results"
+
+.PHONY: tfs-test
+tfs-test:
+	@for testcase in ${TESTCASES}; do ${DISB_PATH}/run.sh tfs $$testcase; done; \
+	echo "Testcases completed, logs are under ${DISB_PATH}/logs, results are under ${DISB_PATH}/benchmarks/results"
+
+.PHONY: trt-container
+trt-container:
+	docker run -it \
+			   --name disb-trt8.4 \
+			   --gpus all \
+			   -v ${PWD}:/workspace/disb \
+			   shenwhang/disb-trt8.4:0.1 \
+			   /bin/bash
+
+.PHONY: triton-front
+triton-front:
+	docker run -it \
+			   --name disb-triton-client \
+			   --net=host \
+			   -v ${PWD}:/workspace/disb \
+			   shenwhang/disb-triton-client:0.2 \
+			   /bin/bash
+
+.PHONY: triton-back
+triton-back:
+	docker run --rm \
+			   --name disb-triton-server \
+			   --gpus all \
+			   -p8000:8000 -p8001:8001 -p8002:8002 \
+			   -v ${PWD}/benchmarks/models:/models \
+			   nvcr.io/nvidia/tritonserver:22.08-py3 \
+			   tritonserver \
+			   --model-repository=/models
+
+.PHONY: tfs-front
+tfs-front:
+	docker run -it \
+			   --name disb-tfs-client \
+			   --network host \
+			   -v ${PWD}:/workspace/disb \
+			   shenwhang/disb-tfs-client:1.0 \
+			   /bin/bash
+
+.PHONY: tfs-back
+tfs-back:
+	docker run -it --rm \
+			   --name disb-tfs-server \
+			   --gpus all \
+			   -p8500:8500 -p8501:8501 \
+			   -v $(DISB_PATH)/benchmarks/models:/models \
+			   tensorflow/serving:2.5.4-gpu \
+			   --model_config_file=/models/models.config
+
+.PHONY: clean
+clean:
+	rm -rf ${BUILD_PATH} ${INSTALL_PATH}
diff --git a/README.md b/README.md
@@ -1,8 +1,10 @@
 # DISB: DNN Inference Serving Benchmark
 
-DISB is a DNN inference serving benchmark with diverse workloads and models. It was originally designed to simulate real-time scenarios, e.g. autonomous driving systems, where both low latency and high throughput are demanded.
+**DISB** is a **D**NN **I**nference **S**erving **B**enchmark with diverse workloads and models. It was originally designed to simulate real-time scenarios, e.g. autonomous driving systems, where both low latency and high throughput are demanded.
 
-DISB uses the client-server architecture, where the clients send the DNN inference requests to the server via RPC, and the server returns the inference result. Clients can submit the inference requests periodically or randomly. An inference request should contain the model name (or id), the input data and other customized attributes (e.g., priority or deadline). 
+DISB uses the client-server architecture, where the clients send the DNN inference requests to the server via RPC, and the server returns the inference result. Clients can submit the inference requests periodically or randomly. An inference request may contain the model name (or id), the input data and other customized attributes (e.g., priority or deadline). 
+
+**Note:** Please use git lfs to clone this repo in order to download model files.
 
 
 
@@ -13,7 +15,7 @@ DISB uses the client-server architecture, where the clients send the DNN inferen
 - [Build & Install](#build--install)
 - [Usage](#usage)
 - [Samples](#samples)
-- [Benchmark Result](#benchmark-result)
+- [Benchmark Results](#benchmark-results)
 - [Paper](#paper)
 - [The Team](#the-team)
 - [Contact Us](#contact-us)
@@ -23,7 +25,7 @@ DISB uses the client-server architecture, where the clients send the DNN inferen
 
 ## DISB Toolkit
 
-DISB provides a C++ library (`libdisb`) to perform benchmarking. To integrate your own DNN inference system with DISB, you only need to implement a `DISBClient` to wrap your inference interface. See [usage](#usage) for details.
+DISB provides a C++ library (`libdisb`) to perform benchmarking. To integrate your own DNN inference system with DISB, you only need to implement `DISB::Client` to wrap your inference interface. See [usage](#usage) for details.
 
 
 
@@ -32,13 +34,15 @@ DISB provides a C++ library (`libdisb`) to perform benchmarking. To integrate yo
 Currently, DISB provides 5 workloads with different DNN models and different number of clients. 
 
 There are three pattern for submitting inference requests in DISB clients:
-1. Uniform Distribution (U): the client sends inference requests periodically, with a fixed frequency (e.g., 20 reqs/s). This pattern is common in data-driven applications (e.g., obstacle detection with cameras).
-2. Possion Distribution (P), the client sends inference requests with a Poisson arrival distribution. This pattern can simulate event-driven applications (e.g., speech recoginition).
-3. Closed-loop (C), the client continuously issues inference requests, which simulates a contention load.
+1. Uniform Distribution (U): The client sends inference requests periodically, with a fixed frequency (e.g., 20 reqs/s). This pattern is common in data-driven applications (e.g., obstacle detection with cameras).
+2. Poisson Distribution (P): The client sends inference requests in a Poisson distribution pattern with a given average arrival speed (e.g., 25 reqs/s). This pattern can simulate event-driven applications (e.g., speech recoginition).
+3. Closed-loop (C): The client continuously sends inference requests, which simulates a contention load.
+4. Trace (T): The client sends inference requests according to a given trace file which contains a series of request time points. This pattern can reproduce real world workloads.
+5. Dependent (D): The client sends inference requests when all prior tasks have completed, prior tasks can be other clients. This pattern can simulate inference graph (or inference DAG), where a model need the output of another model as its input.
 
-See [workloads](./workloads.md) for workload details.
+We combined these patterns into 6 typical workloads for benchmarks, see [workloads](benchmarks/workloads/workloads.md) for workload details.
 
-[TBD] We're still working on providing more representative and general DNN inference serving workloads. We will support dependent load (inference DAG) in next release.
+[TBD] We're still working on providing more representative and general DNN inference serving workloads.
 
 
 
@@ -54,13 +58,8 @@ sudo apt install libjsoncpp-dev
 Build and install DISB tools:
 
 ```shell
-# build libdisb.a
-cmake -B build
-cd build
-make -j$(nproc)
-
-# install
-sudo make install
+# will build and install into disb/install
+make build
 ```
 
 
@@ -72,24 +71,35 @@ sudo make install
   `DISB::Client` is an adaptor class between DISB and the serving backend. You can implement the following interfaces in its subclass. These interfaces will be called during the benchmark, and their execution time will be recorded by DISB.
 
   ```c++
-  # init() will be called once the benchmark begin
+  # init() will be called once when the benchmark begins
   virtual void init();
 
   # The following interfaces will be called by DISB
   # within each inference request during benchmark.
+  # Average latency of each interface will be recorded.
   virtual void prepareInput();
   virtual void preprocess();
   virtual void copyInput();
   virtual void infer();
   virtual void copyOutput();
   virtual void postprocess();
+
+  # If another task dependents on this client,
+  # the InferResult will be passed to the next task.
+  virtual std::shared_ptr<InferResult> produceResult();
   ```
 
 
 
-- ### Strategy
+- ### Load
 
-  `DISB::Strategy` instructs when DISB should launch next inference request. There are two built-in strategies in `DISB::PeriodicStrategy` (launch inference request at a given frequency periodically) and `DISB::TraceStrategy` (launch inference request according to a given trace). They can be enabled by setting certain attribute in json configuration, see [HelloDISB](samples/hellodisb) for example.
+  `DISB::Load` instructs when DISB should launch the next inference request. There are 5 built-in loads simulating the load patterns mentioned in [DISB Workloads](#disb-workloads). They can be enabled by setting certain attributes in json configuration, see [HelloDISB](samples/hellodisb) for example.
+
+
+
+  If you want to use `DISB::DependentLoad`, your client class should inherit `DISB::DependentClient` and implement the virtual methods `consumePrevResults()` and `produceDummyPrevResults()`. `consumePrevResults()` will be called when one of the prior tasks finished one inference and produced one result. You can use the previous results as the input of the DependentClient. You can also inherit `DISB::InferResult` to pass custom data. `produceDummyPrevResults()` will be called when DISB
+
+  is warming up and testing the standalone latency of each client. The results will be consumed by `consumePrevResults()`, making a dependent load  become independent in order to measure standalone latency.
 
 
 
@@ -100,30 +110,30 @@ sudo make install
   ```c++
   void init(const std::string &configJsonStr,
             std::shared_ptr<Client> clientFactory(const Json::Value &config),
-            std::shared_ptr<Strategy> strategyFactory(const Json::Value &config) = builtinStrategyFactory);
+            std::shared_ptr<Load> loadFactory(const Json::Value &config) = builtinLoadFactory);
 
-  void run(void strategyCoordinator(const std::vector<StrategyInfo> &strategyInfos) = builtinStrategyCoordinator);
+  void run(void loadCoordinator(const std::vector<LoadInfo> &loadInfos) = builtinLoadCoordinator);
   ```
 
-  When initializing BenchmarkSuite, a json format string should be passed as config, and a factory method of your own subclass implementation of `DISB::Client` should be provided. The `Json::Value` passed to the factory method is the `"client"` attribute in each task in `configJsonStr`.
+  When initializing BenchmarkSuite, a json formatted string should be passed as config, and a factory method of your own subclass implementation of `DISB::Client` should be provided. The `Json::Value` passed to the factory method is the `"client"` attribute in each task in `configJsonStr`.
 
   
 
-  If you want to custom strategies other than `DISB::PeriodicStrategy` or `DISB::TraceStrategy`, you should implement the virtual method `std::chrono::system_clock::time_point nextLaunchTime(const std::chrono::system_clock::time_point &now)` and provide your own strategy factory method. The `Json::Value` passed to the factory method is the `"strategy"` attribute in each task in `configJsonStr`.
+  If you need customized loads other than the built-in loads, you should implement the virtual method `waitUntilNextLaunch()` and provide your own load factory method. The `Json::Value` passed to the factory method is the `"load"` attribute in each task in `configJsonStr`.
   
   
   
-  If your strategies have to coordinate with each other, you can pass `strategyCoordinator()` to `DISB::BenchmarkSuite::run()`, which makes sure that strategies will not conflict with each other. For example, the `builtinStrategyCoordinator()` will prevent the periodic strategies with the same frequency and the highest priority from launching at the same time by taking standalone latency of each client into account.
+  If your loads need to coordinate with each other, you can pass `loadCoordinator()` to `DISB::BenchmarkSuite::run()`, which makes sure that loads will not conflict with each other. For example, the `builtinLoadCoordinator()` will prevent the periodic loads with the same frequency and the highest priority from launching at the same time by setting different launch delay.
   
   
   
 - ### Analyzer
 
-  `DISB::Analyzer` is used to measure the performance of each inference task, each inference task can have multiple analyzers. `DISB::BasicAnalyzer`, which can measure latency and throughput is implemented by DISB and is enabled for every task by default.
+  `DISB::Analyzer` is used to measure the performance of each inference task, each inference task can have multiple analyzers. `DISB::BasicAnalyzer`, which can measure latency and throughput, is implemented by DISB and is enabled for every task by default.
 
   
 
-  If you want to custom analyzers other than `DISB::BasicAnalyzer`, for example measures gpu usage and memory consumption, the following interfaces should be implemented.
+  If you want customized analyzers other than `DISB::BasicAnalyzer`, for example, an analyzer that measures gpu usage and memory consumption, the following interfaces should be implemented.
 
   ```c++
   virtual void init();
@@ -156,11 +166,11 @@ sudo make install
 
 
 
-  After you have implemented `DISB::Analyzer`, you can add it to a `DISB::Client` by calling `DISB::Client::addAnalyzer()` in the factory method of client. You may refer to [TensorRT sample](samples/tensorrt/README.md) or [Tensorflow Serving sample](samples/tfserving/README.md) for more details. They both implement an `AccuarcyAnalyzer` to measure inference accuarcy.
+  After you have implemented `DISB::Analyzer`, you can add it to a `DISB::Client` by calling `DISB::Client::addAnalyzer()` in the factory method of client. You may refer to [TensorRT sample](benchmarks/frameworks/tensorrt/README.md) or [Tensorflow Serving sample](benchmarks/frameworks/tfserving/README.md) for more details. They both implement an `AccuarcyAnalyzer` to measure inference accuarcy.
 
 
 
-- ### Run a benchmark
+- ### Run benchmarks
 
   ```c++
   #include "disb.h"
@@ -201,22 +211,18 @@ sudo make install
 
   A simple sample that shows how DISB works, needs no extra dependencies.
 
-- [TensorRT](samples/tensorrt/README.md)
-
-  A sample serves MNIST inference requests directly using TensorRT as serving backend, needs CUDA environment to compile.
-
-  You can enable its compiling by adding a cmake parameter: `-DSAMPLE_TENSORRT=ON`.
-
-- [Tensorflow Serving](samples/tfserving/README.md)
 
-  A sample serves ResNet inference requests using Tensorflow Serving as serving backend, needs gRPC environment to compile.
 
-  You can enable its compiling by adding a cmake parameter: `-DSAMPLE_TFSERVING=ON`.
+## Benchmark Results
 
+We have supported DISB on some mainstream DNN inference serving frameworks, including:
+- [TensorRT](benchmarks/frameworks/tensorrt/README.md)
+- [Triton](benchmarks/frameworks/triton/README.md)
+- [Tensorflow Serving](benchmarks/frameworks/tfserving/README.md)
 
-## Benchmark Result
+We tested these DNN inference serving frameworks under 6 [DISB Workloads](#disb-workloads). Test results are shown in [results.md](benchmarks/results/results.md).
 
-[TBD] We will provide the benchmark result of common DNN inference framework on DISB in next release.
+[TBD] We're still working on supporting more DNN inference serving frameworks.
 
 
 
@@ -260,4 +266,4 @@ Rong Chen: rongchen@sjtu.edu.cn
 
 ## License
 
-DISB is released under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0.html).
+DISB is released under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0.html).
diff --git a/benchmarks/benchmarks.md b/benchmarks/benchmarks.md
@@ -0,0 +1,28 @@
+# Benchmarks
+
+We have supported DISB on some mainstream [DNN inference serving frameworks](frameworks), including:
+
+- [TensorRT](frameworks/tensorrt/README.md)
+- [Triton](frameworks/triton/README.md)
+- [Tensorflow Serving](frameworks/tfserving/README.md)
+[TBD] We're still working on supporting more DNN inference serving frameworks.
+
+
+
+Test results: [results](results/results.md)
+
+Workloads: [workloads](workloads)
+
+Supported models: [models](models)
+
+- VGG-19
+- ResNet-50
+- ResNet-152
+- MobileNetV1
+- InceptionV3
+- DistilBert
+- DenseNet-201
+
+Supported inputs: [inputs](inputs)
+
+- ImageNet
diff --git a/benchmarks/frameworks/CMakeLists.txt b/benchmarks/frameworks/CMakeLists.txt
@@ -0,0 +1,12 @@
+if (BUILD_TENSORRT)
+    add_subdirectory(tensorrt)
+endif()
+
+if (BUILD_TRITON)
+    add_subdirectory(triton)
+endif()
+
+if (BUILD_TFSERVING)
+    add_subdirectory(tfserving)
+endif()
+