diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml index acc81d1e6b..6a16fce8f7 100644 --- a/.github/workflows/regression_tests_gpu.yml +++ b/.github/workflows/regression_tests_gpu.yml @@ -5,7 +5,7 @@ on: workflow_dispatch jobs: regression-gpu: # creates workflows for CUDA 11.6 & CUDA 11.7 on ubuntu - runs-on: [self-hosted, ci-gpu] + runs-on: [self-hosted, regression-test-gpu] strategy: fail-fast: false matrix: diff --git a/README.md b/README.md index ff1d4aa99e..b08f9c0323 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,9 @@ Refer to [torchserve docker](docker/README.md) for details. * [Model parallel inference](examples/Huggingface_Transformers#model-parallelism) * [MultiModal models with MMF](https://github.com/pytorch/serve/tree/master/examples/MMF-activity-recognition) combining text, audio and video * [Dual Neural Machine Translation](examples/Workflows/nmt_transformers_pipeline) for a complex workflow DAG - +* [TorchServe Integrations](examples/README.md#torchserve-integrations) +* [TorchServe Internals](examples/README.md#torchserve-internals) +* [TorchServe UseCases](examples/README.md#usecases) For [more examples](examples/README.md) diff --git a/docs/README.md b/docs/README.md index 497ae39e1c..355a6e0268 100644 --- a/docs/README.md +++ b/docs/README.md @@ -33,9 +33,12 @@ TorchServe is a performant, flexible and easy to use tool for serving PyTorch ea * [HuggingFace Language Model](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/Transformer_handler_generalized.py) - This handler takes an input sentence and can return sequence classifications, token classifications or Q&A answers * [Multi Modal Framework](https://github.com/pytorch/serve/blob/master/examples/MMF-activity-recognition/handler.py) - Build and deploy a classifier that combines text, audio and video input data -* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_transformers_pipeline) - +* [Dual Translation Workflow](https://github.com/pytorch/serve/tree/master/examples/Workflows/nmt_transformers_pipeline) - * [Model Zoo](model_zoo.md) - List of pre-trained model archives ready to be served for inference with TorchServe. * [Examples](https://github.com/pytorch/serve/tree/master/examples) - Many examples of how to package and deploy models with TorchServe + - [TorchServe Internals](../examples/README.md#torchserve-internals) + - [TorchServe Integrations](../examples/README.md#torchserve-integrations) + - [TorchServe UseCases](../examples/README.md#usecases) * [Workflow Examples](https://github.com/pytorch/serve/tree/master/examples/Workflows) - Examples of how to compose models in a workflow with TorchServe ## Advanced Features diff --git a/docs/grpc_api.md b/docs/grpc_api.md index 69f1d3ac4e..4583b23b72 100644 --- a/docs/grpc_api.md +++ b/docs/grpc_api.md @@ -28,7 +28,7 @@ Run following commands to Register, run inference and unregister, densenet161 mo - [Install TorchServe](../README.md) - Clone serve repo to run this example - + ```bash git clone https://github.com/pytorch/serve cd serve @@ -43,24 +43,24 @@ pip install -U grpcio protobuf grpcio-tools - Start torchServe ```bash -mkdir model_store -torchserve --start +mkdir models +torchserve --start --model-store models/ ``` - Generate python gRPC client stub using the proto files - + ```bash python -m grpc_tools.protoc --proto_path=frontend/server/src/main/resources/proto/ --python_out=ts_scripts --grpc_python_out=ts_scripts frontend/server/src/main/resources/proto/inference.proto frontend/server/src/main/resources/proto/management.proto ``` - Register densenet161 model - + ```bash python ts_scripts/torchserve_grpc_client.py register densenet161 ``` - - Run inference using - + - Run inference using + ```bash python ts_scripts/torchserve_grpc_client.py infer densenet161 examples/image_classifier/kitten.jpg ``` diff --git a/examples/README.md b/examples/README.md index cd464941cc..1bafd53e58 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,22 +1,103 @@ -# Contents of this Document +# Examples showcasing TorchServe Features and Integrations + +## TorchServe Internals + * [Creating mar file for an eager mode model](#creating-mar-file-for-eager-mode-model) * [Creating mar file for torchscript mode model](#creating-mar-file-for-torchscript-mode-model) -* [Serving torchvision image classification models](#serving-image-classification-models) * [Serving custom model with custom service handler](#serving-custom-model-with-custom-service-handler) -* [Serving text classification model](#serving-text-classification-model) -* [Serving text classification model with scriptable tokenizer](#serving-text-classification-model-with-scriptable-tokenizer) -* [Serving object detection model](#serving-object-detection-model) -* [Serving image segmentation model](#serving-image-segmentation-model) -* [Serving huggingface transformers model](#serving-huggingface-transformers) -* [Serving image generator model](#example-to-serve-gan-model) -* [Serving machine translation model](#serving-neural-machine-translation) -* [Serving waveglow text to speech synthesizer model](#serving-waveglow-text-to-speech-synthesizer) -* [Serving multi modal framework model](#serving-multi-modal-model) -* [Serving Image Classification Workflow](#serving-image-classification-workflow) -* [Serving Neural Machine Translation Workflow](#serving-neural-machine-translation-workflow) -* [Serving Torchrec DLRM (Recommender Model)](#serving-torchrec-dlrm-recommender-model) -* [Serving Image Classifier model for on-premise near real-time video](#serving-image-classifier-model-for-on-premise-near-real-time-video) -* [Serving Image Classifier model and loading image data using torchdata (datapipes)](#serving-image-classifier-model-with-torchdata-datapipes) +* [Creating a Workflow](Workflows/dog_breed_classification) +* [Custom Metrics](custom_metrics) +* [Dynamic Batch Processing](image_classifier/resnet_152_batch) +* [Dynamic Batched Async Requests](image_classifier/near_real_time_video) + +## TorchServe Integrations + +### Kubernetes + +* [Serving HuggingFace faster transformers model in K8s](../kubernetes/examples/FasterTransformer_HuggingFace_Bert.md) + +### KServe + +* [Serving HuggingFace BERT model using KServe](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/custom/torchserve/bert-sample/hugging-face-bert-sample.md) + +### Hugging Face + +* [Serving HuggingFace transformers model](Huggingface_Transformers) + +### MLFlow + +* [Deploy models using `mlflow-torchserve` plugin](https://github.com/mlflow/mlflow-torchserve/tree/master/examples) + +### Captum + +* [Model Explainability with Captum](captum) + +### ONNX + +* [Example for ONNX Integration](../test/pytest/test_onnx.py) + +### TensorRT + +* [Support for TensorRT optimizations](../docs/performance_guide.md#tensorrt-and-nvfuser-support) + +### Microsoft DeepSpeed-MII + +* [HuggingFace Stable Diffusion Model with Microsoft DeepSpeed-MII](deepspeed_mii) + +### Prometheus and mtail + +* [Custom Metrics with mtail and Prometheus](custom_metrics) + +### IntelĀ® Extension for PyTorch +* [ Boost Performance on Intel Hardware](intel_extension_for_pytorch) + +### TorchRec DLRM + + +* [Serving Torchrec DLRM (Recommender Model)](torchrec_dlrm) + +### TorchData +* [Serving Image Classifier model and loading image data using TorchData (datapipes)](image_classifier/mnist/torchdata) + +### PyTorch 2.0 +* [PyTorch 2.0 Integration](pt2) + +### Stable Diffusion +* [Stable Diffusion using HuggingFace Diffusers](diffusers) + +### HuggingFace Large Models +* [HuggingFace Large Models with constrained resources](Huggingface_Largemodels) + +## UseCases + +### Vision +#### Image Classification +* [Serving torchvision image classification models](image_classifier) +* [Serving Image Classifier model for on-premise near real-time video](image_classifier/near_real_time_video) + +#### Object Detection +* [Serving object detection model](object_detector) +* [Serving image segmentation model](image_segmenter) + +#### GAN +* [Serving image generator model](dcgan_fashiongen) + +### Text +#### Neural Machine Translation + + +* [Serving machine translation model](nmt_transformer) +* [Serving Neural Machine Translation Workflow](Workflows/nmt_transformers_pipeline) + +#### Text Classification +* [Serving text classification model](text_classification) +* [Serving text classification model with scriptable tokenizer](text_classification_with_scriptable_tokenizer) + +#### Text to Speech +* [Serving waveglow text to speech synthesizer model](speech2text_wav2vec2) + +### MultiModal +* [Serving multi modal framework model](MMF-activity-recognition) # TorchServe Examples @@ -95,6 +176,12 @@ The following example demonstrates how to create and serve a pretrained transfor * [Hugging Face Transformers](Huggingface_Transformers) +## Captum Integration + +The following example demonstrates TorchServe's integration with Captum, an open source, extensible library for model interpretability built on PyTorch + +* [Captum](captum) + ## Example to serve GAN model The following example demonstrates how to create and serve a pretrained DCGAN model from [facebookresearch/pytorch_GAN_zoo](https://github.com/facebookresearch/pytorch_GAN_zoo) @@ -142,8 +229,8 @@ The following example demonstrates how to serve an image classification model wi * [Near Real-Time Video Batched Image Classification](image_classifier/near_real_time_video) -## Serving Image Classifier Model with torchdata datapipes +## Serving Image Classifier Model with TorchData datapipes -The following example demonstrates how to integrate torchdata with torchserve +The following example demonstrates how to integrate TorchData with torchserve * [Torchdata integration with torchserve an image classification example](image_classifier/mnist/torchdata) diff --git a/examples/images/captum_logo.svg b/examples/images/captum_logo.svg new file mode 100644 index 0000000000..01a58ea307 --- /dev/null +++ b/examples/images/captum_logo.svg @@ -0,0 +1 @@ + diff --git a/examples/images/dlrm.png b/examples/images/dlrm.png new file mode 100644 index 0000000000..0342102cc4 Binary files /dev/null and b/examples/images/dlrm.png differ diff --git a/examples/images/huggingface_logo-noborder.svg b/examples/images/huggingface_logo-noborder.svg new file mode 100644 index 0000000000..ef36d62b6c --- /dev/null +++ b/examples/images/huggingface_logo-noborder.svg @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/examples/images/k8s.png b/examples/images/k8s.png new file mode 100644 index 0000000000..f2712c7a4a Binary files /dev/null and b/examples/images/k8s.png differ diff --git a/examples/images/kserve.png b/examples/images/kserve.png new file mode 100644 index 0000000000..4638f87385 Binary files /dev/null and b/examples/images/kserve.png differ diff --git a/examples/images/mii-white.svg b/examples/images/mii-white.svg new file mode 100644 index 0000000000..70b40f63e3 --- /dev/null +++ b/examples/images/mii-white.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/examples/images/mlflow.png b/examples/images/mlflow.png new file mode 100644 index 0000000000..1b6d0e5276 Binary files /dev/null and b/examples/images/mlflow.png differ diff --git a/examples/images/mtail.png b/examples/images/mtail.png new file mode 100644 index 0000000000..9f8a1ebd63 Binary files /dev/null and b/examples/images/mtail.png differ diff --git a/examples/images/onnx.png b/examples/images/onnx.png new file mode 100644 index 0000000000..e93090bdd3 Binary files /dev/null and b/examples/images/onnx.png differ diff --git a/examples/images/prometheus-logo.svg b/examples/images/prometheus-logo.svg new file mode 100644 index 0000000000..026f9e5bcc --- /dev/null +++ b/examples/images/prometheus-logo.svg @@ -0,0 +1,50 @@ + + + +image/svg+xml diff --git a/examples/images/seq2seq.png b/examples/images/seq2seq.png new file mode 100644 index 0000000000..af09c74a1f Binary files /dev/null and b/examples/images/seq2seq.png differ diff --git a/serving-sdk/pom.xml b/serving-sdk/pom.xml index 038121c617..93ed5ab070 100644 --- a/serving-sdk/pom.xml +++ b/serving-sdk/pom.xml @@ -37,7 +37,7 @@ com.google.code.gson gson - 2.10 + 2.10.1 junit diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index 73fbeb937b..75f2b3a669 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -1005,4 +1005,18 @@ MetricTypes MetricsCache TIMM backends -inductor \ No newline at end of file +inductor +Integrations +integrations +UseCases +usecases +Explainability +TorchData +px +svg +nvfuser +noborder +datapipes +tensorrt +vec +torchdata diff --git a/ts_scripts/torchserve_grpc_client.py b/ts_scripts/torchserve_grpc_client.py index ffcd4d6f2d..367f3a6036 100644 --- a/ts_scripts/torchserve_grpc_client.py +++ b/ts_scripts/torchserve_grpc_client.py @@ -1,34 +1,35 @@ +import argparse + import grpc import inference_pb2 import inference_pb2_grpc import management_pb2 import management_pb2_grpc -import sys def get_inference_stub(): - channel = grpc.insecure_channel('localhost:7070') + channel = grpc.insecure_channel("localhost:7070") stub = inference_pb2_grpc.InferenceAPIsServiceStub(channel) return stub def get_management_stub(): - channel = grpc.insecure_channel('localhost:7071') + channel = grpc.insecure_channel("localhost:7071") stub = management_pb2_grpc.ManagementAPIsServiceStub(channel) return stub def infer(stub, model_name, model_input): - with open(model_input, 'rb') as f: + with open(model_input, "rb") as f: data = f.read() - input_data = {'data': data} + input_data = {"data": data} response = stub.Predictions( - inference_pb2.PredictionsRequest(model_name=model_name, - input=input_data)) + inference_pb2.PredictionsRequest(model_name=model_name, input=input_data) + ) try: - prediction = response.prediction.decode('utf-8') + prediction = response.prediction.decode("utf-8") print(prediction) except grpc.RpcError as e: exit(1) @@ -37,23 +38,23 @@ def infer(stub, model_name, model_input): def register(stub, model_name, mar_set_str): mar_set = set() if mar_set_str: - mar_set = set(mar_set_str.split(',')) + mar_set = set(mar_set_str.split(",")) marfile = f"{model_name}.mar" print(f"## Check {marfile} in mar_set :", mar_set) if marfile not in mar_set: marfile = "https://torchserve.s3.amazonaws.com/mar_files/{}.mar".format( - model_name) + model_name + ) - print(f"## Register marfile:{marfile}\n") + print(f"## Register marfile: {marfile}\n") params = { - 'url': marfile, - 'initial_workers': 1, - 'synchronous': True, - 'model_name': model_name + "url": marfile, + "initial_workers": 1, + "synchronous": True, + "model_name": model_name, } try: - response = stub.RegisterModel( - management_pb2.RegisterModelRequest(**params)) + response = stub.RegisterModel(management_pb2.RegisterModelRequest(**params)) print(f"Model {model_name} registered successfully") except grpc.RpcError as e: print(f"Failed to register model {model_name}.") @@ -64,7 +65,8 @@ def register(stub, model_name, mar_set_str): def unregister(stub, model_name): try: response = stub.UnregisterModel( - management_pb2.UnregisterModelRequest(model_name=model_name)) + management_pb2.UnregisterModelRequest(model_name=model_name) + ) print(f"Model {model_name} unregistered successfully") except grpc.RpcError as e: print(f"Failed to unregister model {model_name}.") @@ -72,17 +74,49 @@ def unregister(stub, model_name): exit(1) -if __name__ == '__main__': - # args: - # 1-> api name [infer, register, unregister] - # 2-> model name - # 3-> model input for prediction - args = sys.argv[1:] - if args[0] == "infer": - infer(get_inference_stub(), args[1], args[2]) - else: - api = globals()[args[0]] - if args[0] == "register": - api(get_management_stub(), args[1], args[2]) - else: - api(get_management_stub(), args[1]) +if __name__ == "__main__": + + parent_parser = argparse.ArgumentParser(add_help=False) + parent_parser.add_argument( + "model_name", + type=str, + default=None, + help="Name of the model used.", + ) + + parser = argparse.ArgumentParser( + description="TorchServe gRPC client", + formatter_class=argparse.RawTextHelpFormatter, + ) + subparsers = parser.add_subparsers(help="Action", dest="action") + + infer_action_parser = subparsers.add_parser( + "infer", parents=[parent_parser], add_help=False + ) + register_action_parser = subparsers.add_parser( + "register", parents=[parent_parser], add_help=False + ) + unregister_action_parser = subparsers.add_parser( + "unregister", parents=[parent_parser], add_help=False + ) + + infer_action_parser.add_argument( + "model_input", type=str, default=None, help="Input for model for inferencing." + ) + + register_action_parser.add_argument( + "mar_set", + type=str, + default=None, + nargs="?", + help="Comma separated list of mar models to be loaded using [model_name=]model_location format.", + ) + + args = parser.parse_args() + + if args.action == "infer": + infer(get_inference_stub(), args.model_name, args.model_input) + elif args.action == "register": + register(get_management_stub(), args.model_name, args.mar_set) + elif args.action == "unregister": + unregister(get_management_stub(), args.model_name)