From 3416ccfb3ad8705c4e381617ef33c2b30876f832 Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Tue, 14 Mar 2023 00:10:29 +0530 Subject: [PATCH 01/10] feat: add KServe gRPC v2 support Signed-off-by: jagadeesh --- .../v2/mnist/mnist_v2_tensor_gprc.json | 11 ++ .../kserve_wrapper/TSModelRepository.py | 11 +- .../kserve/kserve_wrapper/TorchserveModel.py | 54 +++++++- kubernetes/kserve/kserve_wrapper/__main__.py | 64 ++++++--- .../kserve/kserve_wrapper/inference_pb2.py | 37 ++++++ .../kserve_wrapper/inference_pb2_grpc.py | 124 ++++++++++++++++++ 6 files changed, 283 insertions(+), 18 deletions(-) create mode 100644 kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json create mode 100644 kubernetes/kserve/kserve_wrapper/inference_pb2.py create mode 100644 kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py diff --git a/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json new file mode 100644 index 0000000000..5bc4ddd89b --- /dev/null +++ b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json @@ -0,0 +1,11 @@ +{ + "model_name": "mnist", + "inputs": [{ + "name": "input-0", + "shape": [1, 28, 28], + "datatype": "FP32", + "contents": { + "fp32_contents": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.23919999599456787, 0.011800000444054604, 0.1647000014781952, 0.4627000093460083, 0.7569000124931335, 0.4627000093460083, 0.4627000093460083, 0.23919999599456787, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05490000173449516, 0.7020000219345093, 0.9607999920845032, 0.9254999756813049, 0.9490000009536743, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9607999920845032, 0.9215999841690063, 0.3294000029563904, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.592199981212616, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.8353000283241272, 0.7529000043869019, 0.6980000138282776, 0.6980000138282776, 0.7059000134468079, 0.9961000084877014, 0.9961000084877014, 0.9451000094413757, 0.18039999902248383, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16859999299049377, 0.9215999841690063, 0.9961000084877014, 0.8863000273704529, 0.25099998712539673, 0.10980000346899033, 0.0471000000834465, 0.0, 0.0, 0.007799999788403511, 0.5019999742507935, 0.9882000088691711, 1.0, 0.6783999800682068, 0.06669999659061432, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21960000693798065, 0.9961000084877014, 0.9922000169754028, 0.4196000099182129, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5254999995231628, 0.980400025844574, 0.9961000084877014, 0.29409998655319214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.24709999561309814, 0.9961000084877014, 0.6195999979972839, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8666999936103821, 0.9961000084877014, 0.6157000064849854, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7608000040054321, 0.9961000084877014, 0.40389999747276306, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5881999731063843, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13330000638961792, 0.8626999855041504, 0.9373000264167786, 0.22750000655651093, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.49410000443458557, 0.9961000084877014, 0.6705999970436096, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.9373000264167786, 0.2353000044822693, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04309999942779541, 0.8587999939918518, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6352999806404114, 0.9961000084877014, 0.819599986076355, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.20000000298023224, 0.9333000183105469, 0.9961000084877014, 0.29409998655319214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.20000000298023224, 0.6470999717712402, 0.9961000084877014, 0.7646999955177307, 0.015699999406933784, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2587999999523163, 0.9451000094413757, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.011800000444054604, 0.6549000144004822, 0.9961000084877014, 0.8902000188827515, 0.21570000052452087, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.8353000283241272, 0.07840000092983246, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18039999902248383, 0.5960999727249146, 0.7922000288963318, 0.9961000084877014, 0.9961000084877014, 0.24709999561309814, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.9961000084877014, 0.800000011920929, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.9215999841690063, 0.9961000084877014, 0.9961000084877014, 0.9175999760627747, 0.6118000149726868, 0.03920000046491623, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3176000118255615, 0.8039000034332275, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9882000088691711, 0.9175999760627747, 0.4706000089645386, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.10199999809265137, 0.8234999775886536, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.6000000238418579, 0.40779998898506165, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + } + }] +} diff --git a/kubernetes/kserve/kserve_wrapper/TSModelRepository.py b/kubernetes/kserve/kserve_wrapper/TSModelRepository.py index 0093e6cad3..91ffe249b0 100644 --- a/kubernetes/kserve/kserve_wrapper/TSModelRepository.py +++ b/kubernetes/kserve/kserve_wrapper/TSModelRepository.py @@ -14,7 +14,14 @@ class TSModelRepository(ModelRepository): as inputs to the TSModel Repository. """ - def __init__(self, inference_address: str, management_address: str, model_dir: str): + def __init__( + self, + inference_address: str, + management_address: str, + grpc_inference_address: str, + protocol: str, + model_dir: str, + ): """The Inference Address, Management Address and the Model Directory from the kserve side is initialized here. @@ -27,4 +34,6 @@ def __init__(self, inference_address: str, management_address: str, model_dir: s logging.info("TSModelRepo is initialized") self.inference_address = inference_address self.management_address = management_address + self.grpc_inference_address = grpc_inference_address + self.protocol = protocol self.model_dir = model_dir diff --git a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py index aa28a50aa7..4e7f0c84f1 100644 --- a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py +++ b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py @@ -2,19 +2,39 @@ return a KServe side response """ import logging import pathlib +from enum import Enum +from typing import Dict, Union +import grpc +import inference_pb2 +import inference_pb2_grpc import kserve from kserve.errors import ModelMissingError from kserve.model import Model as Model +from kserve.protocol.grpc.grpc_predict_v2_pb2 import ( + ModelInferRequest, + ModelInferResponse, +) +from kserve.protocol.infer_type import InferRequest logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL) PREDICTOR_URL_FORMAT = PREDICTOR_V2_URL_FORMAT = "http://{0}/predictions/{1}" -EXPLAINER_URL_FORMAT = EXPLAINER_V2_URL_FORMAT = "http://{0}/explanations/{1}" +EXPLAINER_URL_FORMAT = EXPLAINER_v2_URL_FORMAT = "http://{0}/explanations/{1}" REGISTER_URL_FORMAT = "{0}/models?initial_workers=1&url={1}" UNREGISTER_URL_FORMAT = "{0}/models/{1}" +class PredictorProtocol(Enum): + REST_V1 = "v1" + REST_V2 = "v2" + GRPC_V2 = "grpc-v2" + + +PREDICTOR_URL_FORMAT = "http://{0}/v1/models/{1}:predict" +EXPLAINER_URL_FORMAT = "http://{0}/v1/models/{1}:explain" + + class TorchserveModel(Model): """The torchserve side inference and explain end-points requests are handled to return a KServe side response @@ -24,7 +44,15 @@ class TorchserveModel(Model): side predict and explain http requests. """ - def __init__(self, name, inference_address, management_address, model_dir): + def __init__( + self, + name, + inference_address, + management_address, + grpc_inference_address, + protocol, + model_dir, + ): """The Model Name, Inference Address, Management Address and the model directory are specified. @@ -43,12 +71,34 @@ def __init__(self, name, inference_address, management_address, model_dir): self.inference_address = inference_address self.management_address = management_address + self.grpc_inference_address = grpc_inference_address self.model_dir = model_dir + self.protocol = protocol + + if self._grpc_client_stub == None: + self._channel = grpc.aio.insecure_channel(self.grpc_inference_address) + self._grpc_client_stub = inference_pb2_grpc.InferenceAPIsServiceStub( + self._channel + ) logging.info("Predict URL set to %s", self.predictor_host) self.explainer_host = self.predictor_host logging.info("Explain URL set to %s", self.explainer_host) + async def _grpc_predict( + self, + payload: Union[ModelInferRequest, InferRequest], + headers: Dict[str, str] = None, + ) -> ModelInferResponse: + if isinstance(payload, InferRequest): + payload = payload.to_grpc() + print(">payload", payload) + input_data = {"data": payload} + async_result = await self._grpc_client.Predictions( + inference_pb2.PredictionsRequest(model_name="mnist", input=input_data) + ) + return async_result + def load(self) -> bool: """This method validates model availabilty in the model directory and sets ready flag to true. diff --git a/kubernetes/kserve/kserve_wrapper/__main__.py b/kubernetes/kserve/kserve_wrapper/__main__.py index b31e3df375..f9631e6d9a 100644 --- a/kubernetes/kserve/kserve_wrapper/__main__.py +++ b/kubernetes/kserve/kserve_wrapper/__main__.py @@ -10,9 +10,8 @@ logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL) DEFAULT_MODEL_NAME = "model" -DEFAULT_INFERENCE_ADDRESS = "http://127.0.0.1:8085" -INFERENCE_PORT = "8085" -DEFAULT_MANAGEMENT_ADDRESS = "http://127.0.0.1:8085" +DEFAULT_INFERENCE_ADDRESS = DEFAULT_MANAGEMENT_ADDRESS = "http://127.0.0.1:8085" +DEFAULT_GRPC_INFERENCE_PORT = "7070" DEFAULT_MODEL_STORE = "/mnt/models/model-store" CONFIG_PATH = "/mnt/models/config/config.properties" @@ -41,49 +40,80 @@ def parse_config(): keys[name.strip()] = value.strip() keys["model_snapshot"] = json.loads(keys["model_snapshot"]) - inference_address, management_address, model_store = ( + inference_address, management_address, grpc_inference_port, model_store = ( keys["inference_address"], keys["management_address"], + keys["grpc_inference_port"], keys["model_store"], ) models = keys["model_snapshot"]["models"] model_names = [] - # constructs inf address at a port other than 8080 as kfserver runs at 8080 - if inference_address: - inf_splits = inference_address.split(":") - inference_address = inf_splits[0] + inf_splits[1] + ":" + INFERENCE_PORT - else: - inference_address = DEFAULT_INFERENCE_ADDRESS + protocol = "grpc-v2" + # Get all the model_names for model, value in models.items(): model_names.append(model) + + if not inference_address: + inference_address = DEFAULT_INFERENCE_ADDRESS if not model_names: model_names = [DEFAULT_MODEL_NAME] + if not inference_address: + inference_address = DEFAULT_INFERENCE_ADDRESS if not management_address: management_address = DEFAULT_MANAGEMENT_ADDRESS + if not grpc_inference_port: + inf_splits = inference_address.split(":") + grpc_inference_address = inf_splits[1] + ":" + DEFAULT_GRPC_INFERENCE_PORT + else: + inf_splits = inference_address.split(":") + grpc_inference_address = inf_splits[1] + ":" + "7070" if not model_store: model_store = DEFAULT_MODEL_STORE + logging.info( - "Wrapper : Model names %s, inference address %s, management address %s, model store %s", + "Wrapper : Model names %s, inference address %s, management address %s, grpc_inference_address, %s, protocol %s, model store %s", model_names, inference_address, management_address, + grpc_inference_address, + protocol, model_store, ) - return model_names, inference_address, management_address, model_store + return ( + model_names, + inference_address, + management_address, + grpc_inference_address, + protocol, + model_store, + ) if __name__ == "__main__": - model_names, inference_address, management_address, model_dir = parse_config() + + ( + model_names, + inference_address, + management_address, + grpc_inference_address, + protocol, + model_dir, + ) = parse_config() models = [] for model_name in model_names: model = TorchserveModel( - model_name, inference_address, management_address, model_dir + model_name, + inference_address, + management_address, + grpc_inference_address, + protocol, + model_dir, ) # By default model.load() is called on first request. Enabling load all # model in TS config.properties, all models are loaded at start and the @@ -91,7 +121,11 @@ def parse_config(): model.load() models.append(model) registeredModels = TSModelRepository( - inference_address, management_address, model_dir + inference_address, + management_address, + grpc_inference_address, + protocol, + model_dir, ) ModelServer( registered_models=registeredModels, diff --git a/kubernetes/kserve/kserve_wrapper/inference_pb2.py b/kubernetes/kserve/kserve_wrapper/inference_pb2.py new file mode 100644 index 0000000000..c0b116092e --- /dev/null +++ b/kubernetes/kserve/kserve_wrapper/inference_pb2.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: inference.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x0finference.proto\x12 org.pytorch.serve.grpc.inference\x1a\x1bgoogle/protobuf/empty.proto"\xbd\x01\n\x12PredictionsRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12N\n\x05input\x18\x03 \x03(\x0b\x32?.org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry\x1a,\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01"(\n\x12PredictionResponse\x12\x12\n\nprediction\x18\x01 \x01(\x0c"*\n\x18TorchServeHealthResponse\x12\x0e\n\x06health\x18\x01 \x01(\t2\xf1\x01\n\x14InferenceAPIsService\x12\\\n\x04Ping\x12\x16.google.protobuf.Empty\x1a:.org.pytorch.serve.grpc.inference.TorchServeHealthResponse"\x00\x12{\n\x0bPredictions\x12\x34.org.pytorch.serve.grpc.inference.PredictionsRequest\x1a\x34.org.pytorch.serve.grpc.inference.PredictionResponse"\x00\x42\x02P\x01\x62\x06proto3' +) + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "inference_pb2", globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b"P\001" + _PREDICTIONSREQUEST_INPUTENTRY._options = None + _PREDICTIONSREQUEST_INPUTENTRY._serialized_options = b"8\001" + _PREDICTIONSREQUEST._serialized_start = 83 + _PREDICTIONSREQUEST._serialized_end = 272 + _PREDICTIONSREQUEST_INPUTENTRY._serialized_start = 228 + _PREDICTIONSREQUEST_INPUTENTRY._serialized_end = 272 + _PREDICTIONRESPONSE._serialized_start = 274 + _PREDICTIONRESPONSE._serialized_end = 314 + _TORCHSERVEHEALTHRESPONSE._serialized_start = 316 + _TORCHSERVEHEALTHRESPONSE._serialized_end = 358 + _INFERENCEAPISSERVICE._serialized_start = 361 + _INFERENCEAPISSERVICE._serialized_end = 602 +# @@protoc_insertion_point(module_scope) diff --git a/kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py b/kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py new file mode 100644 index 0000000000..dd74895e5a --- /dev/null +++ b/kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py @@ -0,0 +1,124 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc +import inference_pb2 as inference__pb2 +from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 + + +class InferenceAPIsServiceStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.Ping = channel.unary_unary( + "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping", + request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + response_deserializer=inference__pb2.TorchServeHealthResponse.FromString, + ) + self.Predictions = channel.unary_unary( + "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions", + request_serializer=inference__pb2.PredictionsRequest.SerializeToString, + response_deserializer=inference__pb2.PredictionResponse.FromString, + ) + + +class InferenceAPIsServiceServicer(object): + """Missing associated documentation comment in .proto file.""" + + def Ping(self, request, context): + """Check health status of the TorchServe server.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def Predictions(self, request, context): + """Predictions entry point to get inference using default model version.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + +def add_InferenceAPIsServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + "Ping": grpc.unary_unary_rpc_method_handler( + servicer.Ping, + request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, + response_serializer=inference__pb2.TorchServeHealthResponse.SerializeToString, + ), + "Predictions": grpc.unary_unary_rpc_method_handler( + servicer.Predictions, + request_deserializer=inference__pb2.PredictionsRequest.FromString, + response_serializer=inference__pb2.PredictionResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + "org.pytorch.serve.grpc.inference.InferenceAPIsService", rpc_method_handlers + ) + server.add_generic_rpc_handlers((generic_handler,)) + + +# This class is part of an EXPERIMENTAL API. +class InferenceAPIsService(object): + """Missing associated documentation comment in .proto file.""" + + @staticmethod + def Ping( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping", + google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, + inference__pb2.TorchServeHealthResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def Predictions( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, + target, + "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions", + inference__pb2.PredictionsRequest.SerializeToString, + inference__pb2.PredictionResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) From 88c21170bbedf3e93654c0aefd63939ca190475e Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Tue, 14 Mar 2023 23:59:01 +0530 Subject: [PATCH 02/10] feat: add utils to convert kserve pb to ts pb Signed-off-by: jagadeesh --- .../kserve/kserve_wrapper/TorchserveModel.py | 16 +++----- kubernetes/kserve/kserve_wrapper/__main__.py | 6 +-- .../kserve/kserve_wrapper/gprc_utils.py | 37 +++++++++++++++++++ 3 files changed, 45 insertions(+), 14 deletions(-) create mode 100644 kubernetes/kserve/kserve_wrapper/gprc_utils.py diff --git a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py index 4e7f0c84f1..2e5d87bdf3 100644 --- a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py +++ b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py @@ -6,9 +6,9 @@ from typing import Dict, Union import grpc -import inference_pb2 -import inference_pb2_grpc import kserve +from gprc_utils import to_ts_grpc +from inference_pb2_grpc import InferenceAPIsServiceStub from kserve.errors import ModelMissingError from kserve.model import Model as Model from kserve.protocol.grpc.grpc_predict_v2_pb2 import ( @@ -77,9 +77,7 @@ def __init__( if self._grpc_client_stub == None: self._channel = grpc.aio.insecure_channel(self.grpc_inference_address) - self._grpc_client_stub = inference_pb2_grpc.InferenceAPIsServiceStub( - self._channel - ) + self._grpc_client_stub = InferenceAPIsServiceStub(self._channel) logging.info("Predict URL set to %s", self.predictor_host) self.explainer_host = self.predictor_host @@ -91,12 +89,8 @@ async def _grpc_predict( headers: Dict[str, str] = None, ) -> ModelInferResponse: if isinstance(payload, InferRequest): - payload = payload.to_grpc() - print(">payload", payload) - input_data = {"data": payload} - async_result = await self._grpc_client.Predictions( - inference_pb2.PredictionsRequest(model_name="mnist", input=input_data) - ) + payload = to_ts_grpc(payload) + async_result = await self._grpc_client.Predictions(payload) return async_result def load(self) -> bool: diff --git a/kubernetes/kserve/kserve_wrapper/__main__.py b/kubernetes/kserve/kserve_wrapper/__main__.py index f9631e6d9a..7dd3369b29 100644 --- a/kubernetes/kserve/kserve_wrapper/__main__.py +++ b/kubernetes/kserve/kserve_wrapper/__main__.py @@ -64,12 +64,12 @@ def parse_config(): inference_address = DEFAULT_INFERENCE_ADDRESS if not management_address: management_address = DEFAULT_MANAGEMENT_ADDRESS + inf_splits = inference_address.split(":") if not grpc_inference_port: - inf_splits = inference_address.split(":") grpc_inference_address = inf_splits[1] + ":" + DEFAULT_GRPC_INFERENCE_PORT else: - inf_splits = inference_address.split(":") - grpc_inference_address = inf_splits[1] + ":" + "7070" + grpc_inference_address = inf_splits[1] + ":" + grpc_inference_port + grpc_inference_address = grpc_inference_address.replace("/", "") if not model_store: model_store = DEFAULT_MODEL_STORE diff --git a/kubernetes/kserve/kserve_wrapper/gprc_utils.py b/kubernetes/kserve/kserve_wrapper/gprc_utils.py new file mode 100644 index 0000000000..189b3edd42 --- /dev/null +++ b/kubernetes/kserve/kserve_wrapper/gprc_utils.py @@ -0,0 +1,37 @@ +import inference_pb2 +import numpy +from kserve.protocol.infer_type import InferRequest + + +def to_ts_grpc(data: InferRequest) -> inference_pb2.PredictionsRequest: + """Converts the InferRequest object to Torchserve gRPC PredictionsRequest message""" + infer_inputs = [] + model_name = data.model_name + for infer_input in data.inputs: + infer_input_dict = { + "name": infer_input.name, + "shape": infer_input.shape, + "datatype": infer_input.datatype, + } + if isinstance(infer_input.data, numpy.ndarray): + infer_input.set_data_from_numpy(infer_input.data, binary_data=False) + infer_input_dict["data"] = infer_input.data + else: + infer_input_dict["data"] = infer_input.data + infer_inputs.append(infer_input.data) + input_data = {"data": infer_inputs[0][0]} + # infer_request = {} + # infer_request["inputs"] = infer_inputs + return inference_pb2.PredictionsRequest(model_name=model_name, input=input_data) + + # infer_inputs = [] + # model_name = data.model_name + # for infer_input in data.inputs: + # if isinstance(infer_input.data, numpy.ndarray): + # infer_input.set_data_from_numpy(infer_input.data, binary_data=True) + # infer_input_dict = {} + # if not isinstance(infer_input.data, List): + # raise InvalidInput("input data is not a List") + # infer_input_dict["data"] = infer_input.data + # infer_inputs.append(infer_input_dict) + # return inference_pb2.PredictionsRequest(model_name= model_name, inputs=infer_inputs) From ae00b78e2ea27e23bdb665b81c210e4d1aa8d1e5 Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Thu, 16 Mar 2023 00:50:01 +0530 Subject: [PATCH 03/10] add ts pb to kserve pb conversion method Signed-off-by: jagadeesh --- .../v2/mnist/mnist_v2_bytes.json | 3 +- .../v2/mnist/mnist_v2_bytes_grpc.json | 11 ++ .../v2/mnist/mnist_v2_tensor_gprc.json | 19 ++-- .../kserve_wrapper/TSModelRepository.py | 4 - .../kserve/kserve_wrapper/TorchserveModel.py | 53 ++++++--- kubernetes/kserve/kserve_wrapper/__main__.py | 3 +- .../kserve/kserve_wrapper/gprc_utils.py | 102 ++++++++++++------ ts/torch_handler/request_envelope/kservev2.py | 24 +++-- 8 files changed, 150 insertions(+), 69 deletions(-) create mode 100644 kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes_grpc.json diff --git a/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes.json b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes.json index 683ada7b73..096c555598 100644 --- a/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes.json +++ b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes.json @@ -1,9 +1,10 @@ { + "id": "d3b15cad-50a2-4eaf-80ce-8b0a428bd298", "inputs": [ { "data": ["iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAA10lEQVR4nGNgGFhgy6xVdrCszBaLFN/mr28+/QOCr69DMCSnA8WvHti0acu/fx/10OS0X/975CDDw8DA1PDn/1pBVEmLf3+zocy2X/+8USXt/82Ds+/+m4sqeehfOpw97d9VFDmlO++t4JwQNMm6f6sZcEpee2+DR/I4A05J7tt4JJP+IUsu+ncRp6TxO9RAQJY0XvrvMAuypNNHuCTz8n+PzVEcy3DtqgiY1ptx6t8/ewY0yX9ntoDA63//Xs3hQpMMPPsPAv68qmDAAFKXwHIzMzCl6AoAxXp0QujtP+8AAAAASUVORK5CYII="], "datatype": "BYTES", - "name": "e8d5afed-0a56-4deb-ac9c-352663f51b93", + "name": "312a4eb0-0ca7-4803-a101-a6d2c18486fe", "shape": [-1] } ] diff --git a/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes_grpc.json b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes_grpc.json new file mode 100644 index 0000000000..44e25e9fc9 --- /dev/null +++ b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_bytes_grpc.json @@ -0,0 +1,11 @@ +{ + "model_name": "mnist", + "inputs": [{ + "name": "312a4eb0-0ca7-4803-a101-a6d2c18486fe", + "shape": [-1], + "datatype": "BYTES", + "contents": { + "bytes_contents": ["iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAAA10lEQVR4nGNgGFhgy6xVdrCszBaLFN/mr28+/QOCr69DMCSnA8WvHti0acu/fx/10OS0X/975CDDw8DA1PDn/1pBVEmLf3+zocy2X/+8USXt/82Ds+/+m4sqeehfOpw97d9VFDmlO++t4JwQNMm6f6sZcEpee2+DR/I4A05J7tt4JJP+IUsu+ncRp6TxO9RAQJY0XvrvMAuypNNHuCTz8n+PzVEcy3DtqgiY1ptx6t8/ewY0yX9ntoDA63//Xs3hQpMMPPsPAv68qmDAAFKXwHIzMzCl6AoAxXp0QujtP+8AAAAASUVORK5CYII="] + } + }] +} diff --git a/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json index 5bc4ddd89b..3fd601005e 100644 --- a/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json +++ b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json @@ -1,11 +1,12 @@ { - "model_name": "mnist", - "inputs": [{ - "name": "input-0", - "shape": [1, 28, 28], - "datatype": "FP32", - "contents": { - "fp32_contents": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.23919999599456787, 0.011800000444054604, 0.1647000014781952, 0.4627000093460083, 0.7569000124931335, 0.4627000093460083, 0.4627000093460083, 0.23919999599456787, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05490000173449516, 0.7020000219345093, 0.9607999920845032, 0.9254999756813049, 0.9490000009536743, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9607999920845032, 0.9215999841690063, 0.3294000029563904, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.592199981212616, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.8353000283241272, 0.7529000043869019, 0.6980000138282776, 0.6980000138282776, 0.7059000134468079, 0.9961000084877014, 0.9961000084877014, 0.9451000094413757, 0.18039999902248383, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16859999299049377, 0.9215999841690063, 0.9961000084877014, 0.8863000273704529, 0.25099998712539673, 0.10980000346899033, 0.0471000000834465, 0.0, 0.0, 0.007799999788403511, 0.5019999742507935, 0.9882000088691711, 1.0, 0.6783999800682068, 0.06669999659061432, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21960000693798065, 0.9961000084877014, 0.9922000169754028, 0.4196000099182129, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5254999995231628, 0.980400025844574, 0.9961000084877014, 0.29409998655319214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.24709999561309814, 0.9961000084877014, 0.6195999979972839, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8666999936103821, 0.9961000084877014, 0.6157000064849854, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7608000040054321, 0.9961000084877014, 0.40389999747276306, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5881999731063843, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13330000638961792, 0.8626999855041504, 0.9373000264167786, 0.22750000655651093, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.49410000443458557, 0.9961000084877014, 0.6705999970436096, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.9373000264167786, 0.2353000044822693, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04309999942779541, 0.8587999939918518, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6352999806404114, 0.9961000084877014, 0.819599986076355, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.20000000298023224, 0.9333000183105469, 0.9961000084877014, 0.29409998655319214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.20000000298023224, 0.6470999717712402, 0.9961000084877014, 0.7646999955177307, 0.015699999406933784, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2587999999523163, 0.9451000094413757, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.011800000444054604, 0.6549000144004822, 0.9961000084877014, 0.8902000188827515, 0.21570000052452087, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.8353000283241272, 0.07840000092983246, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18039999902248383, 0.5960999727249146, 0.7922000288963318, 0.9961000084877014, 0.9961000084877014, 0.24709999561309814, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.9961000084877014, 0.800000011920929, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.9215999841690063, 0.9961000084877014, 0.9961000084877014, 0.9175999760627747, 0.6118000149726868, 0.03920000046491623, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3176000118255615, 0.8039000034332275, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9882000088691711, 0.9175999760627747, 0.4706000089645386, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.10199999809265137, 0.8234999775886536, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.6000000238418579, 0.40779998898506165, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - } - }] + "id": "d3b15cad-50a2-4eaf-80ce-8b0a428bd298", + "model_name": "mnist", + "inputs": [{ + "name": "input-0", + "shape": [1, 28, 28], + "datatype": "FP32", + "contents": { + "fp32_contents": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.23919999599456787, 0.011800000444054604, 0.1647000014781952, 0.4627000093460083, 0.7569000124931335, 0.4627000093460083, 0.4627000093460083, 0.23919999599456787, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05490000173449516, 0.7020000219345093, 0.9607999920845032, 0.9254999756813049, 0.9490000009536743, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9607999920845032, 0.9215999841690063, 0.3294000029563904, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.592199981212616, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.8353000283241272, 0.7529000043869019, 0.6980000138282776, 0.6980000138282776, 0.7059000134468079, 0.9961000084877014, 0.9961000084877014, 0.9451000094413757, 0.18039999902248383, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16859999299049377, 0.9215999841690063, 0.9961000084877014, 0.8863000273704529, 0.25099998712539673, 0.10980000346899033, 0.0471000000834465, 0.0, 0.0, 0.007799999788403511, 0.5019999742507935, 0.9882000088691711, 1.0, 0.6783999800682068, 0.06669999659061432, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.21960000693798065, 0.9961000084877014, 0.9922000169754028, 0.4196000099182129, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5254999995231628, 0.980400025844574, 0.9961000084877014, 0.29409998655319214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.24709999561309814, 0.9961000084877014, 0.6195999979972839, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8666999936103821, 0.9961000084877014, 0.6157000064849854, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7608000040054321, 0.9961000084877014, 0.40389999747276306, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5881999731063843, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13330000638961792, 0.8626999855041504, 0.9373000264167786, 0.22750000655651093, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.49410000443458557, 0.9961000084877014, 0.6705999970436096, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.9373000264167786, 0.2353000044822693, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04309999942779541, 0.8587999939918518, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3294000029563904, 0.9961000084877014, 0.8353000283241272, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6352999806404114, 0.9961000084877014, 0.819599986076355, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.20000000298023224, 0.9333000183105469, 0.9961000084877014, 0.29409998655319214, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.38429999351501465, 0.9961000084877014, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.20000000298023224, 0.6470999717712402, 0.9961000084877014, 0.7646999955177307, 0.015699999406933784, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2587999999523163, 0.9451000094413757, 0.7803999781608582, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.011800000444054604, 0.6549000144004822, 0.9961000084877014, 0.8902000188827515, 0.21570000052452087, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.8353000283241272, 0.07840000092983246, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18039999902248383, 0.5960999727249146, 0.7922000288963318, 0.9961000084877014, 0.9961000084877014, 0.24709999561309814, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8392000198364258, 0.9961000084877014, 0.800000011920929, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.7059000134468079, 0.9215999841690063, 0.9961000084877014, 0.9961000084877014, 0.9175999760627747, 0.6118000149726868, 0.03920000046491623, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3176000118255615, 0.8039000034332275, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9882000088691711, 0.9175999760627747, 0.4706000089645386, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.10199999809265137, 0.8234999775886536, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.9961000084877014, 0.6000000238418579, 0.40779998898506165, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + } + }] } diff --git a/kubernetes/kserve/kserve_wrapper/TSModelRepository.py b/kubernetes/kserve/kserve_wrapper/TSModelRepository.py index 91ffe249b0..4cb0fe47f5 100644 --- a/kubernetes/kserve/kserve_wrapper/TSModelRepository.py +++ b/kubernetes/kserve/kserve_wrapper/TSModelRepository.py @@ -18,8 +18,6 @@ def __init__( self, inference_address: str, management_address: str, - grpc_inference_address: str, - protocol: str, model_dir: str, ): """The Inference Address, Management Address and the Model Directory from the kserve @@ -34,6 +32,4 @@ def __init__( logging.info("TSModelRepo is initialized") self.inference_address = inference_address self.management_address = management_address - self.grpc_inference_address = grpc_inference_address - self.protocol = protocol self.model_dir = model_dir diff --git a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py index 2e5d87bdf3..4daabb8240 100644 --- a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py +++ b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py @@ -6,16 +6,17 @@ from typing import Dict, Union import grpc +import inference_pb2_grpc import kserve -from gprc_utils import to_ts_grpc -from inference_pb2_grpc import InferenceAPIsServiceStub +from gprc_utils import from_ts_grpc, to_ts_grpc +from inference_pb2 import PredictionResponse from kserve.errors import ModelMissingError from kserve.model import Model as Model from kserve.protocol.grpc.grpc_predict_v2_pb2 import ( ModelInferRequest, ModelInferResponse, ) -from kserve.protocol.infer_type import InferRequest +from kserve.protocol.infer_type import InferRequest, InferResponse logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL) @@ -31,10 +32,6 @@ class PredictorProtocol(Enum): GRPC_V2 = "grpc-v2" -PREDICTOR_URL_FORMAT = "http://{0}/v1/models/{1}:predict" -EXPLAINER_URL_FORMAT = "http://{0}/v1/models/{1}:explain" - - class TorchserveModel(Model): """The torchserve side inference and explain end-points requests are handled to return a KServe side response @@ -71,28 +68,54 @@ def __init__( self.inference_address = inference_address self.management_address = management_address - self.grpc_inference_address = grpc_inference_address self.model_dir = model_dir self.protocol = protocol - if self._grpc_client_stub == None: - self._channel = grpc.aio.insecure_channel(self.grpc_inference_address) - self._grpc_client_stub = InferenceAPIsServiceStub(self._channel) + if self.protocol == PredictorProtocol.GRPC_V2.value: + self.predictor_host = grpc_inference_address logging.info("Predict URL set to %s", self.predictor_host) - self.explainer_host = self.predictor_host logging.info("Explain URL set to %s", self.explainer_host) + def grpc_client(self): + if self._grpc_client_stub is None: + self.channel = grpc.aio.insecure_channel(self.predictor_host) + self.grpc_client_stub = inference_pb2_grpc.InferenceAPIsServiceStub( + self.channel + ) + return self.grpc_client_stub + async def _grpc_predict( self, payload: Union[ModelInferRequest, InferRequest], headers: Dict[str, str] = None, ) -> ModelInferResponse: - if isinstance(payload, InferRequest): - payload = to_ts_grpc(payload) - async_result = await self._grpc_client.Predictions(payload) + payload = to_ts_grpc(payload) + grpc_stub = self.grpc_client() + async_result = await grpc_stub.Predictions(payload) return async_result + def postprocess( + self, + response: Union[Dict, InferResponse, ModelInferResponse, PredictionResponse], + headers: Dict[str, str] = None, + ) -> Union[Dict, ModelInferResponse]: + if headers: + if "grpc" in headers.get("user-agent", ""): + if isinstance(response, ModelInferResponse): + return response + elif isinstance(response, InferResponse): + return response.to_grpc() + elif isinstance(response, PredictionResponse): + return from_ts_grpc(response) + if "application/json" in headers.get("content-type", ""): + # If the original request is REST, convert the gRPC predict response to dict + if isinstance(response, ModelInferResponse): + return InferResponse.from_grpc(response).to_rest() + elif isinstance(response, InferResponse): + return response.to_rest() + return response + def load(self) -> bool: """This method validates model availabilty in the model directory and sets ready flag to true. diff --git a/kubernetes/kserve/kserve_wrapper/__main__.py b/kubernetes/kserve/kserve_wrapper/__main__.py index 7dd3369b29..0ae110a8f8 100644 --- a/kubernetes/kserve/kserve_wrapper/__main__.py +++ b/kubernetes/kserve/kserve_wrapper/__main__.py @@ -51,6 +51,7 @@ def parse_config(): model_names = [] protocol = "grpc-v2" + # protocol = "v2" # Get all the model_names for model, value in models.items(): @@ -123,8 +124,6 @@ def parse_config(): registeredModels = TSModelRepository( inference_address, management_address, - grpc_inference_address, - protocol, model_dir, ) ModelServer( diff --git a/kubernetes/kserve/kserve_wrapper/gprc_utils.py b/kubernetes/kserve/kserve_wrapper/gprc_utils.py index 189b3edd42..c2693d3bc7 100644 --- a/kubernetes/kserve/kserve_wrapper/gprc_utils.py +++ b/kubernetes/kserve/kserve_wrapper/gprc_utils.py @@ -1,37 +1,75 @@ -import inference_pb2 -import numpy -from kserve.protocol.infer_type import InferRequest +import base64 +import json +from typing import Union +from inference_pb2 import PredictionResponse, PredictionsRequest +from kserve.errors import InvalidInput +from kserve.protocol.grpc.grpc_predict_v2_pb2 import ( + InferTensorContents, + ModelInferRequest, +) +from kserve.protocol.infer_type import InferOutput, InferRequest, InferResponse -def to_ts_grpc(data: InferRequest) -> inference_pb2.PredictionsRequest: + +def get_content(datatype: str, data: InferTensorContents): + if datatype == "BOOL": + return list(data.bool_contents) + elif datatype in ["UINT8", "UINT16", "UINT32"]: + return list(data.uint_contents) + elif datatype == "UINT64": + return list(data.uint64_contents) + elif datatype in ["INT8", "INT16", "INT32"]: + return list(data.int_contents) + elif datatype == "INT64": + return list(data.int64_contents) + elif datatype == "FP32": + return list(data.fp32_contents) + elif datatype == "FP64": + return list(data.fp64_contents) + elif datatype == "BYTES": + return [base64.b64encode(data.bytes_contents[0]).decode("utf-8")] + else: + raise InvalidInput("invalid content type") + + +def to_ts_grpc(data: Union[ModelInferRequest, InferRequest]) -> PredictionsRequest: """Converts the InferRequest object to Torchserve gRPC PredictionsRequest message""" - infer_inputs = [] + if isinstance(data, InferRequest): + data = data.to_grpc() + infer_request = {} model_name = data.model_name - for infer_input in data.inputs: - infer_input_dict = { - "name": infer_input.name, - "shape": infer_input.shape, - "datatype": infer_input.datatype, - } - if isinstance(infer_input.data, numpy.ndarray): - infer_input.set_data_from_numpy(infer_input.data, binary_data=False) - infer_input_dict["data"] = infer_input.data - else: - infer_input_dict["data"] = infer_input.data - infer_inputs.append(infer_input.data) - input_data = {"data": infer_inputs[0][0]} - # infer_request = {} - # infer_request["inputs"] = infer_inputs - return inference_pb2.PredictionsRequest(model_name=model_name, input=input_data) + infer_inputs = [ + dict( + name=input_tensor.name, + shape=list(input_tensor.shape), + datatype=input_tensor.datatype, + data=get_content(input_tensor.datatype, input_tensor.contents), + ) + for input_tensor in data.inputs + ] + infer_request["id"] = data.id + infer_request["inputs"] = infer_inputs + ts_grpc_input = {"data": json.dumps(infer_request).encode("utf-8")} + return PredictionsRequest(model_name=model_name, input=ts_grpc_input) + - # infer_inputs = [] - # model_name = data.model_name - # for infer_input in data.inputs: - # if isinstance(infer_input.data, numpy.ndarray): - # infer_input.set_data_from_numpy(infer_input.data, binary_data=True) - # infer_input_dict = {} - # if not isinstance(infer_input.data, List): - # raise InvalidInput("input data is not a List") - # infer_input_dict["data"] = infer_input.data - # infer_inputs.append(infer_input_dict) - # return inference_pb2.PredictionsRequest(model_name= model_name, inputs=infer_inputs) +def from_ts_grpc(data: PredictionResponse) -> InferResponse: + """Converts the Torchserve gRPC PredictionResponse object to InferResponse message""" + decoded_data = json.loads(data.prediction.decode("utf-8")) + infer_outputs = [ + InferOutput( + name=output["name"], + shape=list(output["shape"]), + datatype=output["datatype"], + data=output["data"], + ) + for output in decoded_data["outputs"] + ] + response_id = decoded_data.get("id") + infer_response = InferResponse( + model_name=decoded_data["model_name"], + response_id=response_id, + infer_outputs=infer_outputs, + from_grpc=True, + ) + return infer_response.to_grpc() diff --git a/ts/torch_handler/request_envelope/kservev2.py b/ts/torch_handler/request_envelope/kservev2.py index 5a88e9497d..76f0f05ab4 100644 --- a/ts/torch_handler/request_envelope/kservev2.py +++ b/ts/torch_handler/request_envelope/kservev2.py @@ -99,20 +99,28 @@ def _from_json(self, body_list): """ Extracts the data from the JSON object """ - # If the KF Transformer and Explainer sends in data as bytesarray if isinstance(body_list[0], (bytes, bytearray)): - body_list = [json.loads(body.decode()) for body in body_list] + body_list = [json.loads(body.decode("utf8")) for body in body_list] logger.debug("Bytes array is %s", body_list) input_names = [] for index, input in enumerate(body_list[0]["inputs"]): if input["datatype"] == "BYTES": body_list[0]["inputs"][index]["data"] = input["data"][0] + else: + body_list[0]["inputs"][index]["data"] = ( + np.array(input["data"]).reshape(tuple(input["shape"])).tolist() + ) input_names.append(input["name"]) setattr(self.context, "input_names", input_names) logger.debug("Bytes array is %s", body_list) - if body_list[0].get("id") is not None: + id = body_list[0].get("id") + if id and id.strip(): setattr(self.context, "input_request_id", body_list[0]["id"]) + # TODO: Add parameters support + # parameters = body_list[0].get("parameters") + # if parameters: + # setattr(self.context, "input_parameters", body_list[0]["parameters"]) data_list = [inputs_list.get("inputs") for inputs_list in body_list][0] return data_list @@ -143,6 +151,10 @@ def format_output(self, data): delattr(self.context, "input_request_id") else: response["id"] = self.context.get_request_id(0) + # TODO: Add parameters support + # if hasattr(self.context, "input_parameters"): + # response["parameters"] = getattr(self.context, "input_parameters") + # delattr(self.context, "input_parameters") response["model_name"] = self.context.manifest.get("model").get("modelName") response["model_version"] = self.context.manifest.get("model").get( "modelVersion" @@ -166,9 +178,9 @@ def _to_json(self, data, input_name): Constructs JSON object from data """ output_data = {} - data_ndarray = np.array(data) + data_ndarray = np.array(data).flatten() output_data["name"] = input_name - output_data["shape"] = list(data_ndarray.shape) output_data["datatype"] = _to_datatype(data_ndarray.dtype) - output_data["data"] = data_ndarray.flatten().tolist() + output_data["data"] = data_ndarray.tolist() + output_data["shape"] = data_ndarray.shape return output_data From 0c0fd22ccee213eeaffe2beb207dba1bb39f2f2f Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Thu, 16 Mar 2023 22:33:39 +0530 Subject: [PATCH 04/10] Add pb python file generation step at docker build Signed-off-by: jagadeesh --- kubernetes/kserve/Dockerfile | 15 ++- kubernetes/kserve/Dockerfile.dev | 4 +- kubernetes/kserve/README.md | 4 +- kubernetes/kserve/build_image.sh | 9 +- .../kserve/kserve_wrapper/TorchserveModel.py | 22 +++- .../kserve/kserve_wrapper/inference_pb2.py | 37 ------ .../kserve_wrapper/inference_pb2_grpc.py | 124 ------------------ kubernetes/kserve/requirements.txt | 3 + 8 files changed, 49 insertions(+), 169 deletions(-) delete mode 100644 kubernetes/kserve/kserve_wrapper/inference_pb2.py delete mode 100644 kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py diff --git a/kubernetes/kserve/Dockerfile b/kubernetes/kserve/Dockerfile index 95ea649a8e..eb32f579bc 100644 --- a/kubernetes/kserve/Dockerfile +++ b/kubernetes/kserve/Dockerfile @@ -1,13 +1,13 @@ # syntax = docker/dockerfile:experimental # # Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile -# +# # NOTE: To build this you will need a docker version > 18.06 with # experimental enabled and DOCKER_BUILDKIT=1 # # If you do not use buildkit you are not going to have a good time # -# For reference: +# For reference: # https://docs.docker.com/develop/develop-images/build_enhancements ARG BASE_IMAGE=pytorch/torchserve:latest @@ -24,9 +24,18 @@ RUN pip install -r requirements.txt COPY dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh COPY kserve_wrapper kserve_wrapper + +COPY ./*.proto ./kserve_wrapper/ + +RUN python -m grpc_tools.protoc \ + --proto_path=./kserve_wrapper \ + --python_out=./kserve_wrapper \ + --grpc_python_out=./kserve_wrapper \ + ./kserve_wrapper/inference.proto \ + ./kserve_wrapper/management.proto + COPY config.properties config.properties USER model-server ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] - diff --git a/kubernetes/kserve/Dockerfile.dev b/kubernetes/kserve/Dockerfile.dev index 4e7970c11e..5ae86fd8d6 100644 --- a/kubernetes/kserve/Dockerfile.dev +++ b/kubernetes/kserve/Dockerfile.dev @@ -62,7 +62,7 @@ RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \ && git checkout ${BRANCH_NAME} \ && if [ -z "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev; else python ts_scripts/install_dependencies.py --environment=dev --cuda $CUDA_VERSION; fi \ && python ts_scripts/install_from_src.py \ - && python -m pip install captum transformers kserve \ + && python -m pip install captum transformers kserve grpcio protobuf grpcio-tools \ && python -m pip install . \ && useradd -m model-server \ && mkdir -p /home/model-server/tmp \ @@ -70,6 +70,8 @@ RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \ && chmod +x /usr/local/bin/dockerd-entrypoint.sh \ && chown -R model-server /home/model-server \ && cp -R kubernetes/kserve/kserve_wrapper /home/model-server/kserve_wrapper \ + && cp frontend/server/src/main/resources/proto/*.proto /home/model-serve/kserve_wrapper \ + && python -m grpc_tools.protoc --proto_path=/home/model-server/kserve_wrapper --python_out=/home/model-server/kserve_wrapper --grpc_python_out=/home/model-server/kserve_wrapper /home/model-server/kserve_wrapper/inference.proto /home/model-server/kserve_wrapper/management.proto \ && cp kubernetes/kserve/config.properties /home/model-server/config.properties \ && mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store diff --git a/kubernetes/kserve/README.md b/kubernetes/kserve/README.md index c35cd2cabf..cf54a6ce73 100644 --- a/kubernetes/kserve/README.md +++ b/kubernetes/kserve/README.md @@ -30,10 +30,10 @@ Currently, KServe supports the Inference API for all the existing models but tex ./build_image.sh -g -t /: ``` -### Docker Image Dev Build +- To create dev image ```bash -DOCKER_BUILDKIT=1 docker build -f Dockerfile.dev -t pytorch/torchserve-kfs:latest-dev . +./build_image.sh -g -d -t /: ``` ## Running Torchserve inference service in KServe cluster diff --git a/kubernetes/kserve/build_image.sh b/kubernetes/kserve/build_image.sh index ea7b587327..2f17596ee5 100755 --- a/kubernetes/kserve/build_image.sh +++ b/kubernetes/kserve/build_image.sh @@ -2,6 +2,7 @@ DOCKER_TAG="pytorch/torchserve-kfs:latest" BASE_IMAGE="pytorch/torchserve:latest" +DOCKER_FILE="Dockerfile" for arg in "$@" do @@ -18,6 +19,10 @@ do BASE_IMAGE="pytorch/torchserve:latest-gpu" shift ;; + -d|--dev) + DOCKER_FILE="Dockerfile.dev" + shift + ;; -t|--tag) DOCKER_TAG="$2" shift @@ -26,4 +31,6 @@ do esac done -DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" . +cp ../../frontend/server/src/main/resources/proto/*.proto . + +DOCKER_BUILDKIT=1 docker build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" . diff --git a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py index 4daabb8240..47cabcc3e6 100644 --- a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py +++ b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py @@ -88,8 +88,16 @@ def grpc_client(self): async def _grpc_predict( self, payload: Union[ModelInferRequest, InferRequest], - headers: Dict[str, str] = None, ) -> ModelInferResponse: + """Overrides the `_grpc_predict` method in Model class. The predict method calls + the `_grpc_predict` method if the self.protocol is "grpc_v2" + + Args: + request (Dict|InferRequest|ModelInferRequest): The response passed from ``predict`` handler. + + Returns: + Dict: Torchserve grpc response. + """ payload = to_ts_grpc(payload) grpc_stub = self.grpc_client() async_result = await grpc_stub.Predictions(payload) @@ -100,6 +108,18 @@ def postprocess( response: Union[Dict, InferResponse, ModelInferResponse, PredictionResponse], headers: Dict[str, str] = None, ) -> Union[Dict, ModelInferResponse]: + """This method converts the v2 infer response types to gRPC or REST. + For gRPC request it converts InferResponse to gRPC message or directly returns ModelInferResponse from + predictor call or converts TS PredictionResponse to ModelInferResponse. + For REST request it converts ModelInferResponse to Dict or directly returns from predictor call. + + Args: + response (Dict|InferResponse|ModelInferResponse|PredictionResponse): The response passed from ``predict`` handler. + headers (Dict): Request headers. + + Returns: + Dict: post-processed response. + """ if headers: if "grpc" in headers.get("user-agent", ""): if isinstance(response, ModelInferResponse): diff --git a/kubernetes/kserve/kserve_wrapper/inference_pb2.py b/kubernetes/kserve/kserve_wrapper/inference_pb2.py deleted file mode 100644 index c0b116092e..0000000000 --- a/kubernetes/kserve/kserve_wrapper/inference_pb2.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: inference.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x0finference.proto\x12 org.pytorch.serve.grpc.inference\x1a\x1bgoogle/protobuf/empty.proto"\xbd\x01\n\x12PredictionsRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12N\n\x05input\x18\x03 \x03(\x0b\x32?.org.pytorch.serve.grpc.inference.PredictionsRequest.InputEntry\x1a,\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01"(\n\x12PredictionResponse\x12\x12\n\nprediction\x18\x01 \x01(\x0c"*\n\x18TorchServeHealthResponse\x12\x0e\n\x06health\x18\x01 \x01(\t2\xf1\x01\n\x14InferenceAPIsService\x12\\\n\x04Ping\x12\x16.google.protobuf.Empty\x1a:.org.pytorch.serve.grpc.inference.TorchServeHealthResponse"\x00\x12{\n\x0bPredictions\x12\x34.org.pytorch.serve.grpc.inference.PredictionsRequest\x1a\x34.org.pytorch.serve.grpc.inference.PredictionResponse"\x00\x42\x02P\x01\x62\x06proto3' -) - -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "inference_pb2", globals()) -if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b"P\001" - _PREDICTIONSREQUEST_INPUTENTRY._options = None - _PREDICTIONSREQUEST_INPUTENTRY._serialized_options = b"8\001" - _PREDICTIONSREQUEST._serialized_start = 83 - _PREDICTIONSREQUEST._serialized_end = 272 - _PREDICTIONSREQUEST_INPUTENTRY._serialized_start = 228 - _PREDICTIONSREQUEST_INPUTENTRY._serialized_end = 272 - _PREDICTIONRESPONSE._serialized_start = 274 - _PREDICTIONRESPONSE._serialized_end = 314 - _TORCHSERVEHEALTHRESPONSE._serialized_start = 316 - _TORCHSERVEHEALTHRESPONSE._serialized_end = 358 - _INFERENCEAPISSERVICE._serialized_start = 361 - _INFERENCEAPISSERVICE._serialized_end = 602 -# @@protoc_insertion_point(module_scope) diff --git a/kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py b/kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py deleted file mode 100644 index dd74895e5a..0000000000 --- a/kubernetes/kserve/kserve_wrapper/inference_pb2_grpc.py +++ /dev/null @@ -1,124 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc -import inference_pb2 as inference__pb2 -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 - - -class InferenceAPIsServiceStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.Ping = channel.unary_unary( - "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping", - request_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, - response_deserializer=inference__pb2.TorchServeHealthResponse.FromString, - ) - self.Predictions = channel.unary_unary( - "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions", - request_serializer=inference__pb2.PredictionsRequest.SerializeToString, - response_deserializer=inference__pb2.PredictionResponse.FromString, - ) - - -class InferenceAPIsServiceServicer(object): - """Missing associated documentation comment in .proto file.""" - - def Ping(self, request, context): - """Check health status of the TorchServe server.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def Predictions(self, request, context): - """Predictions entry point to get inference using default model version.""" - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_InferenceAPIsServiceServicer_to_server(servicer, server): - rpc_method_handlers = { - "Ping": grpc.unary_unary_rpc_method_handler( - servicer.Ping, - request_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, - response_serializer=inference__pb2.TorchServeHealthResponse.SerializeToString, - ), - "Predictions": grpc.unary_unary_rpc_method_handler( - servicer.Predictions, - request_deserializer=inference__pb2.PredictionsRequest.FromString, - response_serializer=inference__pb2.PredictionResponse.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - "org.pytorch.serve.grpc.inference.InferenceAPIsService", rpc_method_handlers - ) - server.add_generic_rpc_handlers((generic_handler,)) - - -# This class is part of an EXPERIMENTAL API. -class InferenceAPIsService(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def Ping( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Ping", - google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, - inference__pb2.TorchServeHealthResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def Predictions( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/org.pytorch.serve.grpc.inference.InferenceAPIsService/Predictions", - inference__pb2.PredictionsRequest.SerializeToString, - inference__pb2.PredictionResponse.FromString, - options, - channel_credentials, - insecure, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) diff --git a/kubernetes/kserve/requirements.txt b/kubernetes/kserve/requirements.txt index 81199e737f..0734e6d292 100644 --- a/kubernetes/kserve/requirements.txt +++ b/kubernetes/kserve/requirements.txt @@ -1,3 +1,6 @@ kserve>=0.9.0 transformers captum +grpcio +protobuf +grpcio-tools From 456d4765267d4bc95b2ff7efc6f5ef03ea3d9234 Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Wed, 22 Mar 2023 21:04:55 +0530 Subject: [PATCH 05/10] fix: readme doc - add logs Signed-off-by: jagadeesh --- ...or_gprc.json => mnist_v2_tensor_grpc.json} | 0 kubernetes/kserve/kserve_wrapper/README.md | 37 +++++++++++++++++-- .../kserve/kserve_wrapper/TorchserveModel.py | 2 + kubernetes/kserve/kserve_wrapper/__main__.py | 11 ++---- 4 files changed, 39 insertions(+), 11 deletions(-) rename kubernetes/kserve/kf_request_json/v2/mnist/{mnist_v2_tensor_gprc.json => mnist_v2_tensor_grpc.json} (100%) diff --git a/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json b/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_grpc.json similarity index 100% rename from kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_gprc.json rename to kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_grpc.json diff --git a/kubernetes/kserve/kserve_wrapper/README.md b/kubernetes/kserve/kserve_wrapper/README.md index 54837b945d..6264c58166 100644 --- a/kubernetes/kserve/kserve_wrapper/README.md +++ b/kubernetes/kserve/kserve_wrapper/README.md @@ -59,11 +59,11 @@ sudo mkdir -p /mnt/models/model-store For v1 protocol -``export TS_SERVICE_ENVELOPE=kserve` +`export TS_SERVICE_ENVELOPE=kserve` For v2 protocol -``export TS_SERVICE_ENVELOPE=kservev2` +`export TS_SERVICE_ENVELOPE=kservev2` - Step 10: Move the config.properties to /mnt/models/config/. The config.properties file is as below : @@ -93,6 +93,20 @@ torchserve --start --ts-config /mnt/models/config/config.properties - Step 12: Run the below command to start the KFServer +- Step 13: Set protocol version + +For v1 protocol + +`export PROTOCOL_VERSION=v1` + +For v2 protocol + +`export PROTOCOL_VERSION=v2` + +For grpc protocol v2 format set + +`export PROTOCOL_VERSION=grpc-v2` + ```bash python3 serve/kubernetes/kserve/kserve_wrapper/__main__.py ``` @@ -127,7 +141,7 @@ Output: The curl request for explain is as below: -``` +```bash curl -H "Content-Type: application/json" --data @serve/kubernetes/kserve/kf_request_json/v1/mnist.json http://0.0.0.0:8080/v1/models/mnist:explain ``` @@ -169,7 +183,7 @@ Response: The curl request for explain is as below: -``` +```bash curl -H "Content-Type: application/json" --data @serve/kubernetes/kserve/kf_request_json/v1/mnist.json http://0.0.0.0:8080/v2/models/mnist/explain ``` @@ -192,6 +206,21 @@ Response: } ``` +For grpc-v2 protocol + +- Download the proto file + +```bash +curl -O https://raw.githubusercontent.com/kserve/kserve/master/docs/predict-api/v2/grpc_predict_v2.proto +``` +- Download [grpcurl](https://github.com/fullstorydev/grpcurl) + +Make gRPC request + +```bash +grpcurl -vv -plaintext -proto grpc_predict_v2.proto -d @ localhost:8081 inference.GRPCInferenceService.ModelInfer <<< $(cat "serve/kubernetes/kserve/kf_request_json/v2/mnist_tensor_bytes_grpc.json") +``` + ## KServe Wrapper Testing in Local for BERT - Step 1: Follow the same steps from to 10 as what was done for MNIST. diff --git a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py index 47cabcc3e6..5f953fd770 100644 --- a/kubernetes/kserve/kserve_wrapper/TorchserveModel.py +++ b/kubernetes/kserve/kserve_wrapper/TorchserveModel.py @@ -76,6 +76,7 @@ def __init__( logging.info("Predict URL set to %s", self.predictor_host) logging.info("Explain URL set to %s", self.explainer_host) + logging.info("Protocol version is %s", self.protocol) def grpc_client(self): if self._grpc_client_stub is None: @@ -88,6 +89,7 @@ def grpc_client(self): async def _grpc_predict( self, payload: Union[ModelInferRequest, InferRequest], + headers: Dict[str, str] = None, ) -> ModelInferResponse: """Overrides the `_grpc_predict` method in Model class. The predict method calls the `_grpc_predict` method if the self.protocol is "grpc_v2" diff --git a/kubernetes/kserve/kserve_wrapper/__main__.py b/kubernetes/kserve/kserve_wrapper/__main__.py index 0ae110a8f8..e4c8508b41 100644 --- a/kubernetes/kserve/kserve_wrapper/__main__.py +++ b/kubernetes/kserve/kserve_wrapper/__main__.py @@ -1,6 +1,7 @@ """ KServe wrapper to handler inference in the kserve_predictor """ import json import logging +import os import kserve from kserve.model_server import ModelServer @@ -50,9 +51,6 @@ def parse_config(): models = keys["model_snapshot"]["models"] model_names = [] - protocol = "grpc-v2" - # protocol = "v2" - # Get all the model_names for model, value in models.items(): model_names.append(model) @@ -75,12 +73,11 @@ def parse_config(): model_store = DEFAULT_MODEL_STORE logging.info( - "Wrapper : Model names %s, inference address %s, management address %s, grpc_inference_address, %s, protocol %s, model store %s", + "Wrapper : Model names %s, inference address %s, management address %s, grpc_inference_address, %s, model store %s", model_names, inference_address, management_address, grpc_inference_address, - protocol, model_store, ) @@ -89,7 +86,6 @@ def parse_config(): inference_address, management_address, grpc_inference_address, - protocol, model_store, ) @@ -101,10 +97,11 @@ def parse_config(): inference_address, management_address, grpc_inference_address, - protocol, model_dir, ) = parse_config() + protocol = os.environ.get("PROTOCOL_VERSION") + models = [] for model_name in model_names: From ef09c5f8bc54ab972fdc59a5f14b5dbfd808faa1 Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Fri, 4 Aug 2023 14:21:27 +0530 Subject: [PATCH 06/10] update readme Signed-off-by: jagadeesh --- kubernetes/kserve/kserve_wrapper/README.md | 38 ++++++---------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/kubernetes/kserve/kserve_wrapper/README.md b/kubernetes/kserve/kserve_wrapper/README.md index 6264c58166..dc86ca95f5 100644 --- a/kubernetes/kserve/kserve_wrapper/README.md +++ b/kubernetes/kserve/kserve_wrapper/README.md @@ -26,7 +26,7 @@ Follow the below steps to serve the MNIST Model : - Step 2 : Install KServe as below: ```bash -pip install kserve>=0.9.0 +pip install kserve>=0.9.0 grpcio protobuf grpcio-tools ``` - Step 4 : Run the Install Dependencies script @@ -107,6 +107,12 @@ For grpc protocol v2 format set `export PROTOCOL_VERSION=grpc-v2` +- Generate python gRPC client stub using the proto files + +```bash +python -m grpc_tools.protoc --proto_path=frontend/server/src/main/resources/proto/ --python_out=ts_scripts --grpc_python_out=ts_scripts frontend/server/src/main/resources/proto/inference.proto frontend/server/src/main/resources/proto/management.proto +``` + ```bash python3 serve/kubernetes/kserve/kserve_wrapper/__main__.py ``` @@ -160,7 +166,7 @@ For v2 protocol The curl request for inference is as below: ```bash -curl -H "Content-Type: application/json" --data @serve/kubernetes/kserve/kf_request_json/mnist_v2.json http://0.0.0.0:8080/v2/models/mnist/infer +curl -H "Content-Type: application/json" --data @serve/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor.json http://0.0.0.0:8080/v2/models/mnist/infer ``` Response: @@ -181,31 +187,6 @@ Response: } ``` -The curl request for explain is as below: - -```bash -curl -H "Content-Type: application/json" --data @serve/kubernetes/kserve/kf_request_json/v1/mnist.json http://0.0.0.0:8080/v2/models/mnist/explain -``` - -Response: - -```json -{ - "id": "3482b766-0483-40e9-84b0-8ce8d4d1576e", - "model_name": "mnist", - "model_version": "1.0", - "outputs": [{ - "name": "explain", - "shape": [1, 28, 28], - "datatype": "FP64", - "data": [-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0 - ... - ... - ] - }] -} -``` - For grpc-v2 protocol - Download the proto file @@ -213,12 +194,13 @@ For grpc-v2 protocol ```bash curl -O https://raw.githubusercontent.com/kserve/kserve/master/docs/predict-api/v2/grpc_predict_v2.proto ``` + - Download [grpcurl](https://github.com/fullstorydev/grpcurl) Make gRPC request ```bash -grpcurl -vv -plaintext -proto grpc_predict_v2.proto -d @ localhost:8081 inference.GRPCInferenceService.ModelInfer <<< $(cat "serve/kubernetes/kserve/kf_request_json/v2/mnist_tensor_bytes_grpc.json") +grpcurl -vv -plaintext -proto grpc_predict_v2.proto -d @ localhost:8081 inference.GRPCInferenceService.ModelInfer <<< $(cat "serve/kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor_grpc.json") ``` ## KServe Wrapper Testing in Local for BERT From f8a800f72263d29b83e8dce667a5586fa3579f6b Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Fri, 4 Aug 2023 15:29:16 +0530 Subject: [PATCH 07/10] fix lint errors --- kubernetes/kserve/kserve_wrapper/__main__.py | 2 +- ts_scripts/spellcheck_conf/wordlist.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kubernetes/kserve/kserve_wrapper/__main__.py b/kubernetes/kserve/kserve_wrapper/__main__.py index e4c8508b41..f67e6de107 100644 --- a/kubernetes/kserve/kserve_wrapper/__main__.py +++ b/kubernetes/kserve/kserve_wrapper/__main__.py @@ -91,7 +91,6 @@ def parse_config(): if __name__ == "__main__": - ( model_names, inference_address, @@ -118,6 +117,7 @@ def parse_config(): # below method sets status to true for the models. model.load() models.append(model) + registeredModels = TSModelRepository( inference_address, management_address, diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index 902439747a..2c070cf076 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -1068,3 +1068,4 @@ chatGPT baseimage cuDNN Xformer +grpcurl From 73a9f536c083e47d5ac96848246e8e4d151bbccf Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Wed, 9 Aug 2023 10:56:58 +0530 Subject: [PATCH 08/10] fix kserve_v2 service envelop and test data Signed-off-by: jagadeesh --- test/postman/kfv2_inference_data.json | 2 +- ts/torch_handler/request_envelope/kservev2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/postman/kfv2_inference_data.json b/test/postman/kfv2_inference_data.json index e00c715450..2a763876e9 100644 --- a/test/postman/kfv2_inference_data.json +++ b/test/postman/kfv2_inference_data.json @@ -6,7 +6,7 @@ "file": "../kubernetes/kserve/kf_request_json/v2/mnist/mnist_v2_tensor.json", "content-type": "application/json", "expected": { - "id":"d3b15cad-50a2-4eaf-80ce-8b0a428bd298","model_name":"mnist","model_version":"1.0","outputs":[{"name":"input-0","shape":[],"datatype":"INT64","data":[1]}] + "id":"d3b15cad-50a2-4eaf-80ce-8b0a428bd298","model_name":"mnist","model_version":"1.0","outputs":[{"name":"input-0","shape":[1],"datatype":"INT64","data":[1]}] }, "expected_explain": { "id":"d3b15cad-50a2-4eaf-80ce-8b0a428bd298","model_name":"mnist","model_version":"1.0","outputs":[{"name":"input-0","shape":[1,28,28],"datatype":"FP64","data":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0040547527881586954,-0.00022612877132135935,-0.00012734132068921815,0.005648369123934234,0.00890478344415316,0.002638536746843638,0.0026802459473054567,-0.002657801646198628,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0002446577521584037,0.0008218454252870746,0.015285916556975589,0.007512832032495784,0.007094984582680408,0.003405668414819342,-0.0020919248349481525,-0.0007800296083653554,0.022995877395463753,0.019004328861537745,-0.0012529557611487667,-0.0014666116853554992,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.005298396299742967,-0.0007901602589731957,0.00390606628994132,0.02317408192562863,0.01723791734244863,0.010867034230381416,0.003001563449593484,0.006224217749113618,0.006120711993702211,0.016736329208148985,0.005674718979287411,0.0043441351074201716,-0.0012328422456581033,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0006867354470939666,0.009772898561731134,-0.003875493029617137,0.0017986933105143274,0.00130754408083684,-0.0024510981201440493,-0.0008806773035242951,0,0,-0.00014277890938077845,-0.009322312923101268,0.020608317831970053,0.0043513950202448085,-0.0007875567959471073,-0.0009075897498983682,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00022247236805959426,-0.0007829029576392685,0.0026663695298724034,0.000973336645392922,0,0,0,0,0,0,0,0.0004323206544010433,0.023657171718451487,0.010694845123018274,-0.0023759529649896504,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.002074797027562978,-0.0023201009712006193,-0.0012899209165390638,0,0,0,0,0,0,0,0,0,0.007629679307476711,0.010448627340902272,0.00025032896574585353,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0003770835815454417,-0.005156369326824804,0.0012477581647151723,0,0,0,0,0,0,0,0,0,-0.00004442522927758585,0.010248046478304183,0.0009971132925931643,0,0,0,0,0,0,0,0,0,0,0,0,0.0004501049686186689,-0.001963053861562753,-0.0006664790954766908,0.0020157404181157298,0,0,0,0,0,0,0,0,0,-0.0022144570001665495,0.008361584182210209,0.0031401945811928064,0,0,0,0,0,0,0,0,0,0,0,0,-0.0028943546389954404,-0.0031301382952544582,0.002113252627152244,0,0,0,0,0,0,0,0,0,0,-0.0010321050313140568,0.008905753962245818,0.0028464382842652274,0,0,0,0,0,0,0,0,0,0,0,0,-0.005305289160784239,-0.001927110161077484,0.0012090041616218117,0,0,0,0,0,0,0,0,0,0,-0.0011945155110826835,0.005654443253323257,0.0020132074296893847,0,0,0,0,0,0,0,0,0,0,0,0,-0.0014689358191145255,0.00107434126494373,0,0,0,0,0,0,0,0,0,0,0,-0.0017047979656755515,0.002906605326916773,-0.0007805868832212293,0,0,0,0,0,0,0,0,0,0,0,0.000055417251836277426,0.0014516115955483288,0.0002827699382308426,0,0,0,0,0,0,0,0,0,0,0,-0.0014401406798288333,0.002381249994012627,0.002146825485493657,0,0,0,0,0,0,0,0,0,0,0,0.0011500530011764514,0.00028650115062629793,0.0029798149728837,0,0,0,0,0,0,0,0,0,0,0,-0.0017750294246144378,0.0008339858039134471,-0.0037707389974128264,0,0,0,0,0,0,0,0,0,0,0,-0.0006093176702196316,-0.0004690580448827246,0.0034053215399203448,0,0,0,0,0,0,0,0,0,0,-0.0007450010561445004,0.0012987672807208413,-0.00849924754154327,-0.00006145174356975924,0,0,0,0,0,0,0,0,0,0,0,0.0011809727047705845,-0.0018384766530189604,0.005411106767295053,0,0,0,0,0,0,0,0,0,-0.0021392342405935397,0.0003259162378301207,-0.005276118419877435,-0.001950983939698961,-9.545680860124795e-7,0,0,0,0,0,0,0,0,0,0,0,0.000777240560389088,-0.00015179538793786839,0.006481484638650515,0,0,0,0,0,0,0,0,0.00008098065166629173,-0.0024904261335704243,-0.0020718616274916063,-0.00005341157801587443,-0.00045564727357325394,0,0,0,0,0,0,0,0,0,0,0,0,0,0.002275098238597264,0.0017164058060623701,0.00032213445581197173,0,0,0,0,0,-0.001556028266851665,0.0000910724863950236,0.0008772840524484654,0.000650298006504863,-0.004128780934527031,0.0006030386677594234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0013959957755626813,0.00267915270212672,0.0023995009632858484,-0.0004496094979322396,0.003101832911668704,0.007494535603697501,0.002864118701309854,-0.003052590375330078,0.003420222741405451,0.001492401842506996,-0.0009357391552120744,0.0007856228750089005,-0.00184339736789655,0.00001603187900317098,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0006999018662842894,0.004382251035718981,-0.0035419315151426845,-0.002889674705246964,-0.000487345313107622,-0.006087344960098864,0.0003882250941768635,0.0025336419028892817,-0.004352836272916637,-0.0006079418201851047,-0.003810133084711927,-0.0008284412435870998,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0010901530193446261,-0.013135007265412056,0.000473452169279359,0.002050423312678761,-0.00660945214953636,0.00236478632058849,0.004678920566995346,-0.0018122525188342855,0.002137538293354298,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}] diff --git a/ts/torch_handler/request_envelope/kservev2.py b/ts/torch_handler/request_envelope/kservev2.py index 76f0f05ab4..d975c1a946 100644 --- a/ts/torch_handler/request_envelope/kservev2.py +++ b/ts/torch_handler/request_envelope/kservev2.py @@ -182,5 +182,5 @@ def _to_json(self, data, input_name): output_data["name"] = input_name output_data["datatype"] = _to_datatype(data_ndarray.dtype) output_data["data"] = data_ndarray.tolist() - output_data["shape"] = data_ndarray.shape + output_data["shape"] = data_ndarray.flatten().shape return output_data From a838ad7382e14692275c072fa17022ea04ca406b Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Wed, 16 Aug 2023 09:38:21 +0530 Subject: [PATCH 09/10] re-test Signed-off-by: jagadeesh From fef669274dfdc18ea708c9655771b302cb3348d8 Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Tue, 22 Aug 2023 07:39:26 +0530 Subject: [PATCH 10/10] re-test Signed-off-by: jagadeesh