Merge branch 'master' into issue_561

pytorch · Dec 11, 2020 · 0b85126 · 0b85126
2 parents 0cbae6f + 60e4c6c
commit 0b85126
Show file tree

Hide file tree

Showing 58 changed files with 2,522 additions and 4,096 deletions.
diff --git a/README.md b/README.md
@@ -140,7 +140,29 @@ After you execute the `torchserve` command above, TorchServe runs on your host,
 
 ### Get predictions from a model
 
-To test the model server, send a request to the server's `predictions` API.
+To test the model server, send a request to the server's `predictions` API. TorchServe supports all [inference](docs/inference_api.md) and [management](docs/management_api.md) api's through both [gRPC](docs/grpc_api.md) and [HTTP/REST](docs/grpc_api.md).
+
+#### Using GRPC APIs through python client
+
+ - Install grpc python dependencies :
+
+```bash
+pip install -U grpcio protobuf grpcio-tools
+```
+
+ - Generate inference client using proto files
+
+```bash
+python -m grpc_tools.protoc --proto_path=frontend/server/src/main/resources/proto/ --python_out=scripts --grpc_python_out=scripts frontend/server/src/main/resources/proto/inference.proto frontend/server/src/main/resources/proto/management.proto
+```
+
+ - Run inference using a sample client [gRPC python client](scripts/torchserve_grpc_client.py)
+
+```bash
+python scripts/torchserve_grpc_client.py infer densenet161 examples/image_classifier/kitten.jpg
+```
+
+#### Using REST APIs
 
 Complete the following steps:
 

diff --git a/docs/README.md b/docs/README.md
@@ -7,6 +7,7 @@
 * [Installation](../README.md##install-torchserve) - Installation procedures
 * [Serving Models](server.md) - Explains how to use `torchserve`.
   * [REST API](rest_api.md) - Specification on the API endpoint for TorchServe
+  * [gRPC API](grpc_api.md) - Specification on the gRPC API endpoint for TorchServe
 * [Packaging Model Archive](../model-archiver/README.md) - Explains how to package model archive file, use `model-archiver`.
 * [Logging](logging.md) - How to configure logging
 * [Metrics](metrics.md) - How to configure metrics

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -97,6 +97,24 @@ inference_address=https://0.0.0.0:8443
 inference_address=https://172.16.1.10:8080
 ```
 
+### Configure TorchServe gRPC listening ports 
+The inference gRPC API is listening on port 9090 and the management gRPC API is listening on port 9091 by default.
+
+To configure different ports use following poroperties
+
+* `grpc_inference_port`: Inference gRPC API binding port. Default: 9090
+* `grpc_management_port`: management gRPC API binding port. Default: 9091
+
+Here are a couple of examples:
+
+```properties
+grpc_inference_port=8888
+```
+
+```properties
+grpc_management_port=9999
+```
+
 ### Enable SSL
 
 To enable HTTPs, you can change `inference_address`, `management_address` or `metrics_address` protocol from http to https. For example: `inference_address=https://127.0.0.1`.
@@ -201,6 +219,12 @@ By default, TorchServe uses all available GPUs for inference. Use `number_of_gpu
 * `metrics_format` : Use this to specify metric report format . At present, the only supported and default value for this is `prometheus'
 		     This is used in conjunction with `enable_meterics_api` option above.
 
+### Enable metrics api
+* `enable_metrics_api` : Enable or disable metric apis i.e. it can be either `true` or `false`. Default: true (Enabled)
+* `metrics_format` : Use this to specify metric report format . At present, the only supported and default value for this is `prometheus`
+		     This is used in conjunction with `enable_meterics_api` option above.
+
+
 ### Other properties
 
 Most of the following properties are designed for performance tuning. Adjusting these numbers will impact scalability and throughput.

diff --git a/docs/grpc_api.md b/docs/grpc_api.md
@@ -0,0 +1,70 @@
+# TorchServe gRPC API
+
+TorchServe also supports [gRPC APIs](../frontend/server/src/main/resources/proto) for both inference and management calls.
+
+TorchServe provides following gRPCs apis
+
+* [Inference API](../frontend/server/src/main/resources/proto/inference.proto)
+  - **Ping** : Gets the health status of the running server
+  - **Predictions** : Gets predictions from the served model
+
+* [Management API](../frontend/server/src/main/resources/proto/management.proto)
+  - **RegisterModel** : Serve a model/model-version on TorchServe
+  - **UnregisterModel** : Free up system resources by unregistering specific version of a model from TorchServe
+  - **ScaleWorker** : Dynamically adjust the number of workers for any version of a model to better serve different inference request loads.
+  - **ListModels** : Query default versions of current registered models
+  - **DescribeModel** : Get detail runtime status of default version of a model
+  - **SetDefault** : Set any registered version of a model as default version
+
+By default, TorchServe listens on port 9090 for the gRPC Inference API and 9091 for the gRPC Management API.
+To configure gRPC APIs on different ports refer [configuration documentation](configuration.md)
+
+## Python client example for gRPC APIs
+
+Run following commands to Register, run inference and unregister, densenet161 model from [TorchServe model zoo](model_zoo.md) using [gRPC python client](../scripts/torchserve_grpc_client.py).
+
+ - [Install TorchServe](../README.md#install-torchserve)
+
+ - Clone serve repo to run this example
+
+```bash
+git clone
+cd serve
+```
+
+ - Install gRPC python dependencies
+
+```bash
+pip install -U grpcio protobuf grpcio-tools
+```
+
+ - Start torchServe
+
+```bash
+mkdir model_store
+torchserve --start 
+```
+
+ - Generate python gRPC client stub using the proto files
+
+```bash
+python -m grpc_tools.protoc --proto_path=frontend/server/src/main/resources/proto/ --python_out=scripts --grpc_python_out=scripts frontend/server/src/main/resources/proto/inference.proto frontend/server/src/main/resources/proto/management.proto
+```
+
+ - Register densenet161 model
+
+```bash
+python scripts/torchserve_grpc_client.py register densenet161
+```
+
+ - Run inference using 
+
+```bash
+python scripts/torchserve_grpc_client.py infer densenet161 examples/image_classifier/kitten.jpg
+```
+
+ - Unregister densenet161 model
+
+```bash
+python scripts/torchserve_grpc_client.py unregister densenet161
+```
diff --git a/docs/images/SLA.jpg b/docs/images/SLA.jpg
diff --git a/docs/inference_api.md b/docs/inference_api.md
@@ -22,6 +22,8 @@ The out is OpenAPI 3.0.1 json format. You can use it to generate client code, se
 
 ## Health check API
 
+This API follows the [InferenceAPIsService.Ping](../frontend/server/src/main/resources/proto/inference.proto) gRPC API. It returns the status of a model in the ModelServer.
+
 TorchServe supports a `ping` API that you can call to check the health status of a running TorchServe server:
 
 ```bash
@@ -38,6 +40,8 @@ If the server is running, the response is:
 
 ## Predictions API
 
+This API follows the [InferenceAPIsService.Predictions](../frontend/server/src/main/resources/proto/inference.proto) gRPC API. It returns the status of a model in the ModelServer.
+
 To get predictions from the default version of each loaded model, make a REST call to `/predictions/{model_name}`:
 
 * POST /predictions/{model_name}

diff --git a/docs/management_api.md b/docs/management_api.md
@@ -15,6 +15,8 @@ Similar to the [Inference API](inference_api.md), the Management API provides a
 
 ## Register a model
 
+This API follows the [ManagementAPIsService.RegisterModel](../frontend/server/src/main/resources/proto/management.proto) gRPC API.
+
 `POST /models`
 
 * `url` - Model archive download url. Supports the following locations:
@@ -74,6 +76,9 @@ curl -v -X POST "http://localhost:8081/models?initial_workers=1&synchronous=true
 
 ## Scale workers
 
+This API follows the [ManagementAPIsService.ScaleWorker](../frontend/server/src/main/resources/proto/management.proto) gRPC API. It returns the status of a model in the ModelServer.
+
+
 `PUT /models/{model_name}`
 
 * `min_worker` - (optional) the minimum number of worker processes. TorchServe will try to maintain this minimum for specified model. The default value is `1`.
@@ -139,6 +144,8 @@ curl -v -X PUT "http://localhost:8081/models/noop/2.0?min_worker=3&synchronous=t
 
 ## Describe model
 
+This API follows the [ManagementAPIsService.DescribeModel](../frontend/server/src/main/resources/proto/management.proto) gRPC API. It returns the status of a model in the ModelServer.
+
 `GET /models/{model_name}`
 
 Use the Describe Model API to get detail runtime status of default version of a model:
@@ -251,6 +258,8 @@ curl http://localhost:8081/models/noop/all
 
 ## Unregister a model
 
+This API follows the [ManagementAPIsService.UnregisterModel](../frontend/server/src/main/resources/proto/management.proto) gRPC API. It returns the status of a model in the ModelServer.
+
 `DELETE /models/{model_name}/{version}`
 
 Use the Unregister Model API to free up system resources by unregistering specific version of a model from TorchServe:
@@ -264,6 +273,7 @@ curl -X DELETE http://localhost:8081/models/noop/1.0
 ```
 
 ## List models
+This API follows the [ManagementAPIsService.ListModels](../frontend/server/src/main/resources/proto/management.proto) gRPC API. It returns the status of a model in the ModelServer.
 
 `GET /models`
 
@@ -320,6 +330,8 @@ Example outputs of the Inference and Management APIs:
 
 ## Set Default Version
 
+This API follows the [ManagementAPIsService.SetDefault](../frontend/server/src/main/resources/proto/management.proto) gRPC API. It returns the status of a model in the ModelServer.
+
 `PUT /models/{model_name}/{version}/set-default`
 
 To set any registered version of a model as default version use:

diff --git a/frontend/build.gradle b/frontend/build.gradle
@@ -3,10 +3,15 @@ buildscript {
     spotbugsVersion = '4.0.2'
     toolVersion = '4.0.2'
   }
+  dependencies {
+    classpath 'com.google.protobuf:protobuf-gradle-plugin:0.8.13'
+  }
 }
 
 plugins {
-  id 'com.github.spotbugs' version '4.0.2' apply false
+    id 'com.google.protobuf' version '0.8.13' apply false
+    id 'idea'
+    id 'com.github.spotbugs' version '4.0.2' apply false
 }
 
 allprojects {
@@ -25,6 +30,7 @@ allprojects {
     }
 }
 
+
 def javaProjects() {
     return subprojects.findAll();
 }
@@ -63,6 +69,12 @@ configure(javaProjects()) {
                     minimum = 0.70
                 }
             }
+            afterEvaluate {
+                classDirectories.setFrom(files(classDirectories.files.collect {
+                    fileTree(dir: "${rootProject.projectDir}/server/src/main/java",
+                            exclude: ['org/pytorch/serve/grpc**/**'])
+                }))
+            }
         }
     }
 }
diff --git a/frontend/gradle.properties b/frontend/gradle.properties
@@ -8,3 +8,5 @@ slf4j_api_version=1.7.25
 slf4j_log4j12_version=1.7.25
 testng_version=7.1.0
 torchserve_sdk_version=0.0.3
+grpc_version=1.31.1
+protoc_version=3.13.0
diff --git a/frontend/server/build.gradle b/frontend/server/build.gradle
@@ -8,6 +8,7 @@ dependencies {
     testImplementation "org.testng:testng:${testng_version}"
 }
 
+apply from: file("${project.rootProject.projectDir}/tools/gradle/proto.gradle")
 apply from: file("${project.rootProject.projectDir}/tools/gradle/launcher.gradle")
 
 jar {

diff --git a/frontend/server/src/main/java/org/pytorch/serve/ModelServer.java b/frontend/server/src/main/java/org/pytorch/serve/ModelServer.java
@@ -1,5 +1,8 @@
 package org.pytorch.serve;
 
+import io.grpc.Server;
+import io.grpc.ServerBuilder;
+import io.grpc.ServerInterceptors;
 import io.netty.bootstrap.ServerBootstrap;
 import io.netty.channel.ChannelFuture;
 import io.netty.channel.ChannelFutureListener;
@@ -31,6 +34,8 @@
 import org.pytorch.serve.archive.ModelArchive;
 import org.pytorch.serve.archive.ModelException;
 import org.pytorch.serve.archive.ModelNotFoundException;
+import org.pytorch.serve.grpcimpl.GRPCInterceptor;
+import org.pytorch.serve.grpcimpl.GRPCServiceFactory;
 import org.pytorch.serve.metrics.MetricManager;
 import org.pytorch.serve.servingsdk.ModelServerEndpoint;
 import org.pytorch.serve.servingsdk.annotations.Endpoint;
@@ -53,6 +58,8 @@ public class ModelServer {
     private Logger logger = LoggerFactory.getLogger(ModelServer.class);
 
     private ServerGroups serverGroups;
+    private Server inferencegRPCServer;
+    private Server managementgRPCServer;
     private List<ChannelFuture> futures = new ArrayList<>(2);
     private AtomicBoolean stopped = new AtomicBoolean(false);
     private ConfigManager configManager;
@@ -104,7 +111,10 @@ public void startAndWait()
             throws InterruptedException, IOException, GeneralSecurityException,
                     InvalidSnapshotException {
         try {
-            List<ChannelFuture> channelFutures = start();
+            List<ChannelFuture> channelFutures = startRESTserver();
+
+            startGRPCServers();
+
             // Create and schedule metrics manager
             MetricManager.scheduleMetrics(configManager);
             System.out.println("Model server started."); // NOPMD
@@ -305,7 +315,7 @@ public ChannelFuture initializeServer(
      * @throws InterruptedException if interrupted
      * @throws InvalidSnapshotException
      */
-    public List<ChannelFuture> start()
+    public List<ChannelFuture> startRESTserver()
             throws InterruptedException, IOException, GeneralSecurityException,
                     InvalidSnapshotException {
         stopped.set(false);
@@ -363,6 +373,30 @@ public List<ChannelFuture> start()
         return futures;
     }
 
+    public void startGRPCServers() throws IOException {
+        inferencegRPCServer = startGRPCServer(ConnectorType.INFERENCE_CONNECTOR);
+        managementgRPCServer = startGRPCServer(ConnectorType.MANAGEMENT_CONNECTOR);
+    }
+
+    private Server startGRPCServer(ConnectorType connectorType) throws IOException {
+
+        ServerBuilder<?> s =
+                ServerBuilder.forPort(configManager.getGRPCPort(connectorType))
+                        .addService(
+                                ServerInterceptors.intercept(
+                                        GRPCServiceFactory.getgRPCService(connectorType),
+                                        new GRPCInterceptor()));
+
+        if (configManager.isGRPCSSLEnabled()) {
+            s.useTransportSecurity(
+                    new File(configManager.getCertificateFile()),
+                    new File(configManager.getPrivateKeyFile()));
+        }
+        Server server = s.build();
+        server.start();
+        return server;
+    }
+
     private boolean validEndpoint(Annotation a, EndpointTypes type) {
         return a instanceof Endpoint
                 && !((Endpoint) a).urlPattern().isEmpty()
@@ -388,6 +422,16 @@ public boolean isRunning() {
         return !stopped.get();
     }
 
+    private void stopgRPCServer(Server server) {
+        if (server != null) {
+            try {
+                server.shutdown().awaitTermination();
+            } catch (InterruptedException e) {
+                e.printStackTrace(); // NOPMD
+            }
+        }
+    }
+
     private void exitModelStore() throws ModelNotFoundException {
         ModelManager modelMgr = ModelManager.getInstance();
         Map<String, Model> defModels = modelMgr.getDefaultModels();
@@ -420,6 +464,10 @@ public void stop() {
         }
 
         stopped.set(true);
+
+        stopgRPCServer(inferencegRPCServer);
+        stopgRPCServer(managementgRPCServer);
+
         for (ChannelFuture future : futures) {
             try {
                 future.channel().close().sync();