Skip to content

Commit

Permalink
Merge branch 'master' into doc-improvement/kserve
Browse files Browse the repository at this point in the history
  • Loading branch information
chauhang authored Sep 22, 2022
2 parents faf36e3 + 4f7caa3 commit 3c95d4a
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 14 deletions.
27 changes: 27 additions & 0 deletions docker/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ DOCKER_FILE="Dockerfile"
BASE_IMAGE="ubuntu:18.04"
CUSTOM_TAG=false
CUDA_VERSION=""
UBUNTU_VERSION="ubuntu:18.04"
USE_LOCAL_SERVE_FOLDER=false
BUILD_WITH_IPEX=false

Expand All @@ -21,6 +22,7 @@ do
echo "-g, --gpu specify to use gpu"
echo "-bt, --buildtype specify to created image for codebuild. Possible values: production, dev, codebuild."
echo "-cv, --cudaversion specify to cuda version to use"
echo "-ub, --ubuntu specify ubuntu version. Possible values: ubuntu:18.04, ubuntu 20.04"
echo "-t, --tag specify tag name for docker image"
echo "-lf, --use-local-serve-folder specify this option for the benchmark image if the current 'serve' folder should be used during automated benchmarks"
echo "-ipex, --build-with-ipex specify to build with intel_extension_for_pytorch"
Expand Down Expand Up @@ -63,6 +65,7 @@ do
BUILD_WITH_IPEX=true
shift
;;
# With default ubuntu version 18.04
-cv|--cudaversion)
CUDA_VERSION="$2"
if [ $CUDA_VERSION == "cu116" ];
Expand Down Expand Up @@ -90,6 +93,30 @@ do
shift
shift
;;
# CUDA 10 is not supported on Ubuntu 20.04
-ub|--ubuntu)
UBUNTU_VERSION="$2"
if [[ $CUDA_VERSION == "cu116" && $UBUNTU_VERSION == "ubuntu20.04" ]];
then
BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04"
elif [[ $CUDA_VERSION == "cu113" && $UBUNTU_VERSION == "ubuntu20.04" ]];
then
BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-runtime-ubuntu20.04"
elif [[ $CUDA_VERSION == "cu111" && $UBUNTU_VERSION == "ubuntu20.04" ]];
then
BASE_IMAGE="nvidia/cuda:11.1.0-cudnn8-runtime-ubuntu20.04"
elif [[ $UBUNTU_VERSION == "ubuntu20.04" ]];
then
echo "Using CPU image"
BASE_IMAGE="ubuntu:20.04"
else
echo "Ubuntu and CUDA version combination is not supported"
echo $UBUNTU_VERSION
echo $CUDA_VERSION
exit 1
fi
shift
;;
esac
done

Expand Down
1 change: 1 addition & 0 deletions kubernetes/kserve/config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ metrics_format=prometheus
NUM_WORKERS=1
number_of_netty_threads=4
job_queue_size=10
load_models=all
model_store=/home/model-server/shared/model-store
model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"mnist":{"1.0":{"defaultVersion":true,"marName":"mnist.mar","minWorkers":1,"maxWorkers":5,"batchSize":5,"maxBatchDelay":200,"responseTimeout":60}}}}
8 changes: 5 additions & 3 deletions kubernetes/kserve/kserve_wrapper/TSModelRepository.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
""" The repository to serve the Torchserve Models in the kserve side"""
import logging
from importlib.metadata import version

import kserve

if version('kserve') >= '0.8.0':
if version("kserve") >= "0.8.0":
from kserve.model_repository import ModelRepository as ModelRepository
else:
from kserve.kfmodel_repository import KFModelRepository as ModelRepository

logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL)


class TSModelRepository(ModelRepository):
"""A repository of kserve KFModels
Args:
KFModelRepository (object): The parameters from the KFModelRepository is passed
as inputs to the TSModel Repository.
"""
def __init__(self, inference_address: str, management_address: str,
model_dir: str):

def __init__(self, inference_address: str, management_address: str, model_dir: str):
"""The Inference Address, Management Address and the Model Directory from the kserve
side is initialized here.
Expand Down
24 changes: 22 additions & 2 deletions kubernetes/kserve/kserve_wrapper/TorchserveModel.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
""" The torchserve side inference end-points request are handled to
return a KServe side response """
import json
from typing import Dict
import logging
import pathlib
from importlib.metadata import version
from typing import Dict

import kserve
import tornado.web
from kserve.model import ModelMissingError

if version('kserve') >= '0.8.0':
if version("kserve") >= "0.8.0":
from kserve.model import Model as Model
else:
from kserve.kfmodel import KFModel as Model
Expand Down Expand Up @@ -121,3 +124,20 @@ async def explain(self, request: Dict) -> Dict:
if response.code != 200:
raise tornado.web.HTTPError(status_code=response.code, reason=response.body)
return json.loads(response.body)

def load(self) -> bool:
"""This method validates model availabilty in the model directory
and sets ready flag to true.
"""
model_path = pathlib.Path(kserve.Storage.download(self.model_dir))
paths = list(pathlib.Path(model_path).glob("*.mar"))
existing_paths = [path for path in paths if path.exists()]
if len(existing_paths) == 0:
raise ModelMissingError(model_path)
elif len(existing_paths) > 1:
raise RuntimeError(
"More than one model file is detected, "
f"Only one is allowed within model_dir: {existing_paths}"
)
self.ready = True
return self.ready
28 changes: 19 additions & 9 deletions kubernetes/kserve/kserve_wrapper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
import json
import logging
from importlib.metadata import version
import kserve

import kserve
from TorchserveModel import TorchserveModel
from TSModelRepository import TSModelRepository

if version('kserve') >= '0.8.0':
if version("kserve") >= "0.8.0":
from kserve.model_server import ModelServer
else:
from kserve.kfserver import KFServer as ModelServer
Expand Down Expand Up @@ -74,26 +74,36 @@ def parse_config():
model_store = DEFAULT_MODEL_STORE
logging.info(
"Wrapper : Model names %s, inference address %s, management address %s, model store %s",
model_names, inference_address, management_address, model_store)
model_names,
inference_address,
management_address,
model_store,
)

return model_names, inference_address, management_address, model_store


if __name__ == "__main__":

model_names, inference_address, management_address, model_dir = parse_config(
)
model_names, inference_address, management_address, model_dir = parse_config()

models = []

for model_name in model_names:

model = TorchserveModel(model_name, inference_address,
management_address, model_dir)
model = TorchserveModel(
model_name, inference_address, management_address, model_dir
)
# By default model.load() is called on first request. Enabling load all
# model in TS config.properties, all models are loaded at start and the
# below method sets status to true for the models.
model.load()
models.append(model)
registeredModels = TSModelRepository(
inference_address, management_address, model_dir
)
ModelServer(
registered_models=TSModelRepository(inference_address,
management_address, model_dir),
registered_models=registeredModels,
http_port=8080,
grpc_port=7070,
).start(models)

0 comments on commit 3c95d4a

Please sign in to comment.