diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml
index acc81d1e6b..6a16fce8f7 100644
--- a/.github/workflows/regression_tests_gpu.yml
+++ b/.github/workflows/regression_tests_gpu.yml
@@ -5,7 +5,7 @@ on: workflow_dispatch
jobs:
regression-gpu:
# creates workflows for CUDA 11.6 & CUDA 11.7 on ubuntu
- runs-on: [self-hosted, ci-gpu]
+ runs-on: [self-hosted, regression-test-gpu]
strategy:
fail-fast: false
matrix:
diff --git a/README.md b/README.md
index ff1d4aa99e..b08f9c0323 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,9 @@ Refer to [torchserve docker](docker/README.md) for details.
* [Model parallel inference](examples/Huggingface_Transformers#model-parallelism)
* [MultiModal models with MMF](https://github.com/pytorch/serve/tree/master/examples/MMF-activity-recognition) combining text, audio and video
* [Dual Neural Machine Translation](examples/Workflows/nmt_transformers_pipeline) for a complex workflow DAG
-
+* [TorchServe Integrations](examples/README.md#torchserve-integrations)
+* [TorchServe Internals](examples/README.md#torchserve-internals)
+* [TorchServe UseCases](examples/README.md#usecases)
For [more examples](examples/README.md)
diff --git a/docs/README.md b/docs/README.md
index 497ae39e1c..355a6e0268 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -33,9 +33,12 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea
* [HuggingFace Language Model](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/Transformer_handler_generalized.py) - This handler takes an input sentence and can return sequence classifications, token classifications or Q&A answers
* [Multi Modal Framework](https://github.com/pytorch/serve/blob/master/examples/MMF-activity-recognition/handler.py) - Build and deploy a classifier that combines text, audio and video input data
-* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_transformers_pipeline) -
+* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_transformers_pipeline) -
* [Model Zoo](model_zoo.md) - List of pre-trained model archives ready to be served for inference with TorchServe.
* [Examples](https://github.com/pytorch/serve/tree/master/examples) - Many examples of how to package and deploy models with TorchServe
+ - [TorchServe Internals](../examples/README.md#torchserve-internals)
+ - [TorchServe Integrations](../examples/README.md#torchserve-integrations)
+ - [TorchServe UseCases](../examples/README.md#usecases)
* [Workflow Examples](https://github.com/pytorch/serve/tree/master/examples/Workflows) - Examples of how to compose models in a workflow with TorchServe
## Advanced Features
diff --git a/docs/grpc_api.md b/docs/grpc_api.md
index 69f1d3ac4e..4583b23b72 100644
--- a/docs/grpc_api.md
+++ b/docs/grpc_api.md
@@ -28,7 +28,7 @@ Run following commands to Register, run inference and unregister, densenet161 mo
- [Install TorchServe](../README.md)
- Clone serve repo to run this example
-
+
```bash
git clone https://github.com/pytorch/serve
cd serve
@@ -43,24 +43,24 @@ pip install -U grpcio protobuf grpcio-tools
- Start torchServe
```bash
-mkdir model_store
-torchserve --start
+mkdir models
+torchserve --start --model-store models/
```
- Generate python gRPC client stub using the proto files
-
+
```bash
python -m grpc_tools.protoc --proto_path=frontend/server/src/main/resources/proto/ --python_out=ts_scripts --grpc_python_out=ts_scripts frontend/server/src/main/resources/proto/inference.proto frontend/server/src/main/resources/proto/management.proto
```
- Register densenet161 model
-
+
```bash
python ts_scripts/torchserve_grpc_client.py register densenet161
```
- - Run inference using
-
+ - Run inference using
+
```bash
python ts_scripts/torchserve_grpc_client.py infer densenet161 examples/image_classifier/kitten.jpg
```
diff --git a/examples/README.md b/examples/README.md
index cd464941cc..1bafd53e58 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,22 +1,103 @@
-# Contents of this Document
+# Examples showcasing TorchServe Features and Integrations
+
+## TorchServe Internals
+
* [Creating mar file for an eager mode model](#creating-mar-file-for-eager-mode-model)
* [Creating mar file for torchscript mode model](#creating-mar-file-for-torchscript-mode-model)
-* [Serving torchvision image classification models](#serving-image-classification-models)
* [Serving custom model with custom service handler](#serving-custom-model-with-custom-service-handler)
-* [Serving text classification model](#serving-text-classification-model)
-* [Serving text classification model with scriptable tokenizer](#serving-text-classification-model-with-scriptable-tokenizer)
-* [Serving object detection model](#serving-object-detection-model)
-* [Serving image segmentation model](#serving-image-segmentation-model)
-* [Serving huggingface transformers model](#serving-huggingface-transformers)
-* [Serving image generator model](#example-to-serve-gan-model)
-* [Serving machine translation model](#serving-neural-machine-translation)
-* [Serving waveglow text to speech synthesizer model](#serving-waveglow-text-to-speech-synthesizer)
-* [Serving multi modal framework model](#serving-multi-modal-model)
-* [Serving Image Classification Workflow](#serving-image-classification-workflow)
-* [Serving Neural Machine Translation Workflow](#serving-neural-machine-translation-workflow)
-* [Serving Torchrec DLRM (Recommender Model)](#serving-torchrec-dlrm-recommender-model)
-* [Serving Image Classifier model for on-premise near real-time video](#serving-image-classifier-model-for-on-premise-near-real-time-video)
-* [Serving Image Classifier model and loading image data using torchdata (datapipes)](#serving-image-classifier-model-with-torchdata-datapipes)
+* [Creating a Workflow](Workflows/dog_breed_classification)
+* [Custom Metrics](custom_metrics)
+* [Dynamic Batch Processing](image_classifier/resnet_152_batch)
+* [Dynamic Batched Async Requests](image_classifier/near_real_time_video)
+
+## TorchServe Integrations
+
+### Kubernetes
+
+* [Serving HuggingFace faster transformers model in K8s](../kubernetes/examples/FasterTransformer_HuggingFace_Bert.md)
+
+### KServe
+
+* [Serving HuggingFace BERT model using KServe](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/custom/torchserve/bert-sample/hugging-face-bert-sample.md)
+
+### Hugging Face
+
+* [Serving HuggingFace transformers model](Huggingface_Transformers)
+
+### MLFlow
+
+* [Deploy models using `mlflow-torchserve` plugin](https://github.com/mlflow/mlflow-torchserve/tree/master/examples)
+
+### Captum
+
+* [Model Explainability with Captum](captum)
+
+### ONNX
+
+* [Example for ONNX Integration](../test/pytest/test_onnx.py)
+
+### TensorRT
+
+* [Support for TensorRT optimizations](../docs/performance_guide.md#tensorrt-and-nvfuser-support)
+
+### Microsoft DeepSpeed-MII
+
+* [HuggingFace Stable Diffusion Model with Microsoft DeepSpeed-MII](deepspeed_mii)
+
+### Prometheus and mtail
+
+* [Custom Metrics with mtail and Prometheus](custom_metrics)
+
+### IntelĀ® Extension for PyTorch
+* [ Boost Performance on Intel Hardware](intel_extension_for_pytorch)
+
+### TorchRec DLRM
+
+
+* [Serving Torchrec DLRM (Recommender Model)](torchrec_dlrm)
+
+### TorchData
+* [Serving Image Classifier model and loading image data using TorchData (datapipes)](image_classifier/mnist/torchdata)
+
+### PyTorch 2.0
+* [PyTorch 2.0 Integration](pt2)
+
+### Stable Diffusion
+* [Stable Diffusion using HuggingFace Diffusers](diffusers)
+
+### HuggingFace Large Models
+* [HuggingFace Large Models with constrained resources](Huggingface_Largemodels)
+
+## UseCases
+
+### Vision
+#### Image Classification
+* [Serving torchvision image classification models](image_classifier)
+* [Serving Image Classifier model for on-premise near real-time video](image_classifier/near_real_time_video)
+
+#### Object Detection
+* [Serving object detection model](object_detector)
+* [Serving image segmentation model](image_segmenter)
+
+#### GAN
+* [Serving image generator model](dcgan_fashiongen)
+
+### Text
+#### Neural Machine Translation
+
+
+* [Serving machine translation model](nmt_transformer)
+* [Serving Neural Machine Translation Workflow](Workflows/nmt_transformers_pipeline)
+
+#### Text Classification
+* [Serving text classification model](text_classification)
+* [Serving text classification model with scriptable tokenizer](text_classification_with_scriptable_tokenizer)
+
+#### Text to Speech
+* [Serving waveglow text to speech synthesizer model](speech2text_wav2vec2)
+
+### MultiModal
+* [Serving multi modal framework model](MMF-activity-recognition)
# TorchServe Examples
@@ -95,6 +176,12 @@ The following example demonstrates how to create and serve a pretrained transfor
* [Hugging Face Transformers](Huggingface_Transformers)
+## Captum Integration
+
+The following example demonstrates TorchServe's integration with Captum, an open source, extensible library for model interpretability built on PyTorch
+
+* [Captum](captum)
+
## Example to serve GAN model
The following example demonstrates how to create and serve a pretrained DCGAN model from [facebookresearch/pytorch_GAN_zoo](https://github.com/facebookresearch/pytorch_GAN_zoo)
@@ -142,8 +229,8 @@ The following example demonstrates how to serve an image classification model wi
* [Near Real-Time Video Batched Image Classification](image_classifier/near_real_time_video)
-## Serving Image Classifier Model with torchdata datapipes
+## Serving Image Classifier Model with TorchData datapipes
-The following example demonstrates how to integrate torchdata with torchserve
+The following example demonstrates how to integrate TorchData with torchserve
* [Torchdata integration with torchserve an image classification example](image_classifier/mnist/torchdata)
diff --git a/examples/images/captum_logo.svg b/examples/images/captum_logo.svg
new file mode 100644
index 0000000000..01a58ea307
--- /dev/null
+++ b/examples/images/captum_logo.svg
@@ -0,0 +1 @@
+
diff --git a/examples/images/dlrm.png b/examples/images/dlrm.png
new file mode 100644
index 0000000000..0342102cc4
Binary files /dev/null and b/examples/images/dlrm.png differ
diff --git a/examples/images/huggingface_logo-noborder.svg b/examples/images/huggingface_logo-noborder.svg
new file mode 100644
index 0000000000..ef36d62b6c
--- /dev/null
+++ b/examples/images/huggingface_logo-noborder.svg
@@ -0,0 +1,73 @@
+
+
diff --git a/examples/images/k8s.png b/examples/images/k8s.png
new file mode 100644
index 0000000000..f2712c7a4a
Binary files /dev/null and b/examples/images/k8s.png differ
diff --git a/examples/images/kserve.png b/examples/images/kserve.png
new file mode 100644
index 0000000000..4638f87385
Binary files /dev/null and b/examples/images/kserve.png differ
diff --git a/examples/images/mii-white.svg b/examples/images/mii-white.svg
new file mode 100644
index 0000000000..70b40f63e3
--- /dev/null
+++ b/examples/images/mii-white.svg
@@ -0,0 +1,19 @@
+
diff --git a/examples/images/mlflow.png b/examples/images/mlflow.png
new file mode 100644
index 0000000000..1b6d0e5276
Binary files /dev/null and b/examples/images/mlflow.png differ
diff --git a/examples/images/mtail.png b/examples/images/mtail.png
new file mode 100644
index 0000000000..9f8a1ebd63
Binary files /dev/null and b/examples/images/mtail.png differ
diff --git a/examples/images/onnx.png b/examples/images/onnx.png
new file mode 100644
index 0000000000..e93090bdd3
Binary files /dev/null and b/examples/images/onnx.png differ
diff --git a/examples/images/prometheus-logo.svg b/examples/images/prometheus-logo.svg
new file mode 100644
index 0000000000..026f9e5bcc
--- /dev/null
+++ b/examples/images/prometheus-logo.svg
@@ -0,0 +1,50 @@
+
+
+
+
diff --git a/examples/images/seq2seq.png b/examples/images/seq2seq.png
new file mode 100644
index 0000000000..af09c74a1f
Binary files /dev/null and b/examples/images/seq2seq.png differ
diff --git a/serving-sdk/pom.xml b/serving-sdk/pom.xml
index 038121c617..93ed5ab070 100644
--- a/serving-sdk/pom.xml
+++ b/serving-sdk/pom.xml
@@ -37,7 +37,7 @@
com.google.code.gson
gson
- 2.10
+ 2.10.1
junit
diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt
index 73fbeb937b..75f2b3a669 100644
--- a/ts_scripts/spellcheck_conf/wordlist.txt
+++ b/ts_scripts/spellcheck_conf/wordlist.txt
@@ -1005,4 +1005,18 @@ MetricTypes
MetricsCache
TIMM
backends
-inductor
\ No newline at end of file
+inductor
+Integrations
+integrations
+UseCases
+usecases
+Explainability
+TorchData
+px
+svg
+nvfuser
+noborder
+datapipes
+tensorrt
+vec
+torchdata
diff --git a/ts_scripts/torchserve_grpc_client.py b/ts_scripts/torchserve_grpc_client.py
index ffcd4d6f2d..367f3a6036 100644
--- a/ts_scripts/torchserve_grpc_client.py
+++ b/ts_scripts/torchserve_grpc_client.py
@@ -1,34 +1,35 @@
+import argparse
+
import grpc
import inference_pb2
import inference_pb2_grpc
import management_pb2
import management_pb2_grpc
-import sys
def get_inference_stub():
- channel = grpc.insecure_channel('localhost:7070')
+ channel = grpc.insecure_channel("localhost:7070")
stub = inference_pb2_grpc.InferenceAPIsServiceStub(channel)
return stub
def get_management_stub():
- channel = grpc.insecure_channel('localhost:7071')
+ channel = grpc.insecure_channel("localhost:7071")
stub = management_pb2_grpc.ManagementAPIsServiceStub(channel)
return stub
def infer(stub, model_name, model_input):
- with open(model_input, 'rb') as f:
+ with open(model_input, "rb") as f:
data = f.read()
- input_data = {'data': data}
+ input_data = {"data": data}
response = stub.Predictions(
- inference_pb2.PredictionsRequest(model_name=model_name,
- input=input_data))
+ inference_pb2.PredictionsRequest(model_name=model_name, input=input_data)
+ )
try:
- prediction = response.prediction.decode('utf-8')
+ prediction = response.prediction.decode("utf-8")
print(prediction)
except grpc.RpcError as e:
exit(1)
@@ -37,23 +38,23 @@ def infer(stub, model_name, model_input):
def register(stub, model_name, mar_set_str):
mar_set = set()
if mar_set_str:
- mar_set = set(mar_set_str.split(','))
+ mar_set = set(mar_set_str.split(","))
marfile = f"{model_name}.mar"
print(f"## Check {marfile} in mar_set :", mar_set)
if marfile not in mar_set:
marfile = "https://torchserve.s3.amazonaws.com/mar_files/{}.mar".format(
- model_name)
+ model_name
+ )
- print(f"## Register marfile:{marfile}\n")
+ print(f"## Register marfile: {marfile}\n")
params = {
- 'url': marfile,
- 'initial_workers': 1,
- 'synchronous': True,
- 'model_name': model_name
+ "url": marfile,
+ "initial_workers": 1,
+ "synchronous": True,
+ "model_name": model_name,
}
try:
- response = stub.RegisterModel(
- management_pb2.RegisterModelRequest(**params))
+ response = stub.RegisterModel(management_pb2.RegisterModelRequest(**params))
print(f"Model {model_name} registered successfully")
except grpc.RpcError as e:
print(f"Failed to register model {model_name}.")
@@ -64,7 +65,8 @@ def register(stub, model_name, mar_set_str):
def unregister(stub, model_name):
try:
response = stub.UnregisterModel(
- management_pb2.UnregisterModelRequest(model_name=model_name))
+ management_pb2.UnregisterModelRequest(model_name=model_name)
+ )
print(f"Model {model_name} unregistered successfully")
except grpc.RpcError as e:
print(f"Failed to unregister model {model_name}.")
@@ -72,17 +74,49 @@ def unregister(stub, model_name):
exit(1)
-if __name__ == '__main__':
- # args:
- # 1-> api name [infer, register, unregister]
- # 2-> model name
- # 3-> model input for prediction
- args = sys.argv[1:]
- if args[0] == "infer":
- infer(get_inference_stub(), args[1], args[2])
- else:
- api = globals()[args[0]]
- if args[0] == "register":
- api(get_management_stub(), args[1], args[2])
- else:
- api(get_management_stub(), args[1])
+if __name__ == "__main__":
+
+ parent_parser = argparse.ArgumentParser(add_help=False)
+ parent_parser.add_argument(
+ "model_name",
+ type=str,
+ default=None,
+ help="Name of the model used.",
+ )
+
+ parser = argparse.ArgumentParser(
+ description="TorchServe gRPC client",
+ formatter_class=argparse.RawTextHelpFormatter,
+ )
+ subparsers = parser.add_subparsers(help="Action", dest="action")
+
+ infer_action_parser = subparsers.add_parser(
+ "infer", parents=[parent_parser], add_help=False
+ )
+ register_action_parser = subparsers.add_parser(
+ "register", parents=[parent_parser], add_help=False
+ )
+ unregister_action_parser = subparsers.add_parser(
+ "unregister", parents=[parent_parser], add_help=False
+ )
+
+ infer_action_parser.add_argument(
+ "model_input", type=str, default=None, help="Input for model for inferencing."
+ )
+
+ register_action_parser.add_argument(
+ "mar_set",
+ type=str,
+ default=None,
+ nargs="?",
+ help="Comma separated list of mar models to be loaded using [model_name=]model_location format.",
+ )
+
+ args = parser.parse_args()
+
+ if args.action == "infer":
+ infer(get_inference_stub(), args.model_name, args.model_input)
+ elif args.action == "register":
+ register(get_management_stub(), args.model_name, args.mar_set)
+ elif args.action == "unregister":
+ unregister(get_management_stub(), args.model_name)