levindabhi · nihit2809 · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023 · Apr 17, 2023
diff --git a/README.md b/README.md
@@ -50,6 +50,65 @@ Here command is for single node, 4 gpu. Tested only for single node.
 ### OR 
 - Inference in colab from here [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1EhEy3uQh-5oOSagUotVOJAf8m7Vqn0D6?usp=sharing)
 
+# Deploy on Vertex AI with Torchserve
+- Download pretrained model of `.pt` format from this [link](https://drive.google.com/file/d/1Ee4igrf5axte9nV1KvcgtEnO7KPKaVm6/view?usp=sharing)(169 MB) in `deploy` folder.
+- Used the `deploy/convert_to_torchscript.py` to convert the trained_checkpoint file to torchscipt format.
+- Build the docker image using the Dockerfile inside the `deploy` folder using command(replace `<project-id>` with the id of your google cloud project):
+```
+docker build -t gcr.io/<project-id>/pytorch_predict_cloth_seg .
+```
+- Authenticate with the Google Cloud SDK by running the command:
+```
+gcloud auth configure-docker
+```
+- Push your Docker image to the Google Cloud Registry using the command:
+```
+docker push gcr.io/(project-id)/pytorch_predict_cloth_seg
+```
+- Create a new custom model on vertex ai using this commad in google cloud sdk(replace `<location>` and `<project-id>`):
+```
+gcloud ai models upload \
+  --container-ports=7080 \
+  --container-predict-route="/predictions/cloth_seg" \
+  --container-health-route="/ping" \
+  --region=<location> \
+  --display-name=cloth_seg \
+  --container-image-uri=gcr.io/<project-id>/pytorch_predict_cloth_seg
+```
+- Create an endpoint using this command in google cloud sdk(replace `<location>` and `<project-id>`):
+```
+gcloud ai endpoints create \
+  --project=<project-id> \
+  --region=<location> \
+  --display-name=cloth_seg
+```
+- Deploy the model on the above created endpoint using the following command in google cloud sdk(replace `<endpoint-id>`(endpoint id from the endpoint we created in above steps), `<location>`, `<project-id>`, `<model-id>`(model id from the model we created in above steps) and `<machine-type>`):
+```
+gcloud ai endpoints deploy-model <endpoint-id> \
+  --project=<project-id> \
+  --region=<location> \
+  --model=<model-id> \
+  --traffic-split=0=100 \
+  --machine-type="<machine-type>" \
+  --display-name=cloth_seg
+```
+- Download the required libraries for inference using `pip install pillow google`.
+- Test the deployed model using the `deployed_infer.py` file:
+```
+python deployed_infer.py --input <path/to/input/image> --output <path/to/output/folder> --project <project-id> --location <location> --project_number <project-number> --endpoint_id <endpoind-id>
+```
+Example:
+```
+python deployed_infer.py --input image.jpg --output output/ --project demo-project-374930 --location asia-south1 --project_number 63********42 --endpoint_id 2129************216	
+```
+To get info about the arguments:
+```
+python deployed_infer.py --help
+```
+**Note:** 
+* The location of the endpoint, model and deployed model should be same.
+* The input_image format while inferencing on vertex ai should be jpeg, jpg or png.
+* Incase of facing some errors, try to uncomment the `print` statements in `handler.py` to debug an check the logs in GCP.
 # Acknowledgements
 - U2net model is from original [u2net repo](https://github.com/xuebinqin/U-2-Net). Thanks to Xuebin Qin for amazing repo.
 - Complete repo follows structure of [Pix2pixHD repo](https://github.com/NVIDIA/pix2pixHD)

diff --git a/deploy/Dockerfile b/deploy/Dockerfile
@@ -0,0 +1,33 @@
+# pull the latest torchserve image
+FROM pytorch/torchserve:latest-cpu
+
+# copy model artifacts, custom handler and other dependencies
+COPY ./handler.py /home/model-server/
+COPY ./torchscript.pt /home/model-server/
+
+# create torchserve configuration file
+USER root
+RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties
+RUN printf "\nmanagement_address=http://0.0.0.0:7081" >> /home/model-server/config.properties
+USER model-server
+
+# expose health and prediction listener ports from the image
+EXPOSE 7080
+EXPOSE 7081
+
+# create model archive file packaging model artifacts and dependencies
+RUN torch-model-archiver -f \
+  --model-name=cloth_seg \
+  --version=1.0 \
+  --serialized-file=/home/model-server/torchscript.pt \
+  --handler=/home/model-server/handler.py \
+  --export-path=/home/model-server/model-store
+
+# run Torchserve HTTP serve to respond to prediction requests
+CMD ["torchserve", \
+     "--start", \
+     "--ts-config=/home/model-server/config.properties", \
+     "--models", \
+     "cloth_seg=cloth_seg.mar", \
+     "--model-store", \
+     "/home/model-server/model-store"]
diff --git a/deploy/convert_to_torchScript.py b/deploy/convert_to_torchScript.py
@@ -0,0 +1,42 @@
+# Import necessary libraries
+from networks import U2NET
+import torch
+import torchvision.transforms as transforms
+from collections import OrderedDict
+from PIL import Image
+
+# Define the image transformation pipeline
+transforms_list = []
+transforms_list += [transforms.ToTensor()]
+transforms_list += [transforms.Lambda(lambda x: normalize_image(x, 0.5, 0.5))] # Normalize the image tensor
+transform_rgb = transforms.Compose(transforms_list)
+
+# Load the pre-trained U2NET model
+model = U2NET(in_ch=3, out_ch=4)
+model_state_dict = torch.load('trained_checkpoint/cloth_segm_u2net_latest.pth', map_location=torch.device("cpu"))
+new_state_dict = OrderedDict()
+for k, v in model_state_dict.items():
+  name = k[7:]
+  new_state_dict[name] = v
+
+model.load_state_dict(new_state_dict)
+
+# Load the input image and apply the transformation pipeline
+image = Image.open('image.jpg')
+image_tensor = transform_rgb(image)
+
+# Define a helper function to normalize the image tensor
+def normalize_image(image_tensor, mean, std):
+    assert isinstance(mean, float)
+    assert isinstance(std, float)
+
+    if image_tensor.shape[0] == 1:
+        normalize = transforms.Normalize([mean], [std])
+    elif image_tensor.shape[0] == 3:
+        normalize = transforms.Normalize([mean] * 3, [std] * 3)
+    elif image_tensor.shape[0] == 18:
+        normalize = transforms.Normalize([mean] * 18, [std] * 18)
+    else:
+        raise ValueError(f"Unsupported tensor shape {image_tensor.shape}")
+
+    return normalize(image_tensor)
diff --git a/deploy/handler.py b/deploy/handler.py
@@ -0,0 +1,143 @@
+import os
+import io
+from PIL import Image
+import subprocess
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+import time
+import base64
+import json
+import datetime
+
+from ts.torch_handler.base_handler import BaseHandler
+
+
+class SegmentationHandler(BaseHandler):
+    def __init__(self):
+        super().__init__()
+        self.ctx = None
+        self.initialized = False
+        self.net = None
+        self.device = None
+
+    def initialize(self, ctx):
+        # Initialize the handler with the given context
+        self.manifest = ctx.manifest
+        properties = ctx.system_properties
+        model_dir = properties.get("model_dir")
+        self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")
+
+        # Load the PyTorch model from the serialized file
+        serialized_file = self.manifest['model']['serializedFile']
+        model_pt_path = os.path.join(model_dir, serialized_file)
+        if not os.path.isfile(model_pt_path):
+            raise RuntimeError("Missing the model.pt file")
+        self.net = torch.jit.load(model_pt_path)
+
+        # Define the transforms to apply to the input image
+        self.transforms_list = []
+        self.transforms_list += [transforms.ToTensor()]
+        self.transforms_list += [transforms.Lambda(lambda x: normalize_image(x, 0.5, 0.5))]
+        self.transform_rgb = transforms.Compose(self.transforms_list)
+        self.do_palette = True
+
+        # Define the color palette to use for the output mask
+        self.palette = get_palette(4)
+        self.initialized = True
+
+
+    def preprocess(self, data):
+        # Preprocess the input data by converting the image to a tensor
+        inputs = []
+        #print(data)
+        input_path = data[0].get('body')
+        #print(input_path)
+        if type(input_path)==bytearray:
+            # If the input is in JSON format, extract the image data and decode it
+            json_string = input_path.decode()
+            data_dict = json.loads(json_string)
+            image = Image.open(io.BytesIO(base64.b64decode(data_dict['instances'][0]['instance_key'])))
+        else:
+            # If the input is already a decoded image, extract the image data
+            final_data = input_path['instances'][0]['instance_key']
+            #print(final_data)
+            image = Image.open(io.BytesIO(base64.b64decode(final_data)))
+
+        # Preprocess image
+        image_tensor = self.transform_rgb(image)
+        inputs.append(image_tensor)
+
+        return inputs
+
+
+    def inference(self, data):
+        # Predict output mask
+        image_tensor = data.unsqueeze(0)
+        image_tensor = image_tensor.to(self.device)
+        with torch.no_grad():
+            output_tensor = self.net.forward(image_tensor)
+        output_tensor = F.log_softmax(output_tensor[0], dim=1)
+        output_tensor = torch.max(output_tensor, dim=1, keepdim=True)[1]
+        output_tensor = torch.squeeze(output_tensor, dim=0)
+        output_tensor = torch.squeeze(output_tensor, dim=0)
+        output_arr = output_tensor.cpu().numpy()
+
+        output_img = Image.fromarray(output_arr.astype("uint8"), mode="L")
+        if self.do_palette:
+            output_img.putpalette(self.palette)
+
+        return output_img
+
+    def handle(self, data, context):
+        # Preprocess input
+        inputs = self.preprocess(data)
+        # Perform inference
+        outputs = []
+        for idx, d in enumerate(inputs):
+            start_time = time.time()
+            output_img = self.inference(d)
+            end_time = time.time()
+            print(f'Time taken for inference {idx}: {end_time - start_time:.4f} seconds')
+
+            timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+
+            img_bytes = io.BytesIO()
+            output_img.save(img_bytes, format='PNG')
+            outputs.append({"body":base64.b64encode(img_bytes.getvalue()).decode('utf-8'), "content_type": "image/png"})
+
+        return outputs
+
+def get_palette(num_cls):
+    n = num_cls
+    palette = [0] * (n * 3)
+    for j in range(0, n):
+        lab = j
+        palette[j * 3 + 0] = 0
+        palette[j * 3 + 1] = 0
+        palette[j * 3 + 2] = 0
+        i = 0
+        while lab:
+            palette[j * 3 + 0] |= ((lab >> 0) & 1) << (7 - i)
+            palette[j * 3 + 1] |= ((lab >> 1) & 1) << (7 - i)
+            palette[j * 3 + 2] |= ((lab >> 2) & 1) << (7 - i)
+            i += 1
+            lab >>= 3
+    return palette
+
+def normalize_image(image_tensor, mean, std):
+
+    assert isinstance(mean, float)
+    assert isinstance(std, float)
+
+    if image_tensor.shape[0] == 1:
+        normalize = transforms.Normalize([mean], [std])
+    elif image_tensor.shape[0] == 3:
+        normalize = transforms.Normalize([mean] * 3, [std] * 3)
+    elif image_tensor.shape[0] == 18:
+        normalize = transforms.Normalize([mean] * 18, [std] * 18)
+    else:
+        raise ValueError(f"Unsupported tensor shape {image_tensor.shape}")
+
+    return normalize(image_tensor)
diff --git a/deployed_infer.py b/deployed_infer.py
@@ -0,0 +1,65 @@
+import argparse
+import base64
+import datetime
+import io
+import json
+from google.api import httpbody_pb2
+from google.cloud import aiplatform
+from google.cloud import aiplatform_v1
+from PIL import Image
+
+parser = argparse.ArgumentParser(description='Make cloth segmentation predictions on an image using Google Cloud Vertex AI Platform.')
+
+parser.add_argument('-i', '--input', type=str, required=True, help='path to the input image(jpg, jpeg or png) file')
+parser.add_argument('-o', '--output', type=str, required=True, help='folder to the output PNG image file')
+parser.add_argument('-p', '--project', type=str, required=True, help='google cloud project ID where it is to be deployed')
+parser.add_argument('-l', '--location', type=str, required=True, help='location of the server')
+parser.add_argument('-p_no', '--project_number', type=str, required=True, help='the project number on google cloud')
+parser.add_argument('-e_id', '--endpoint_id', type=str, required=True, help='endpoint id on which the model is deployed')
+
+args = parser.parse_args()
+
+project = args.project
+location = args.location
+
+# initiating the aiplatform
+aiplatform.init(project=project, location=location)
+
+#processing the input image
+input_img = Image.open(args.input)
+img_bytes = io.BytesIO()
+if (args.input[-3:].lower()=='jpg') or (args.input[-4:].lower()=='jpeg'): 
+    input_img.save(img_bytes, format='JPEG')
+elif args.input[-3:].lower()=='png':
+    input_img.save(img_bytes, format='PNG')
+output = base64.b64encode(img_bytes.getvalue()).decode('utf-8')
+
+instances = {"instances":[{"instance_key": output}]}
+
+endpoint = aiplatform.Endpoint(f"projects/{args.project_number}/locations/{location}/endpoints/{args.endpoint_id}")
+
+prediction_client = aiplatform_v1.PredictionServiceClient(
+    client_options={"api_endpoint": f"{location}-aiplatform.googleapis.com"}
+)
+
+http_body = httpbody_pb2.HttpBody(
+        data=json.dumps(instances).encode("utf-8"),
+        content_type="application/json",
+    )
+
+ENDPOINT_RESOURCE_NAME = endpoint.resource_name
+
+request = aiplatform_v1.RawPredictRequest(
+    endpoint=ENDPOINT_RESOURCE_NAME,
+    http_body=http_body,
+)
+
+# performing inference
+check1 = prediction_client.raw_predict(request=request)
+timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+body = check1.data[13:-34].decode()
+
+# saving the output
+Image.open(io.BytesIO(base64.b64decode(body))).save(args.output+f"output-{timestamp}.png", format='PNG')
+print(f'Successfully saved the PNG image to {args.output}/output-{timestamp}.png')
+