From ff5fdbddc0c3caebb33a9faf92b1377e3f9ada09 Mon Sep 17 00:00:00 2001
From: Dhanasekar Karuppasamy <dhanak@amazon.com>
Date: Thu, 23 Apr 2020 22:47:06 +0000
Subject: [PATCH] Fixes #205 - Docker install not finding GPUs

    Invokes docker with  --runtime=nvidia for GPU
    Added the option to specify the GPU / specific GPU ids in start.sh
    Fixed the documentation for start.sh script
    Fixed the JDK version in Dockerfile.gpu
---
 README.md             |  9 +++++++++
 build_image.sh        |  4 +++-
 docker/Dockerfile.gpu |  4 ++--
 docker/README.md      |  5 +++++
 start.sh              | 34 +++++++++++++++++++++++++++++++---
 5 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index b2c997d3a5..27b4ed1370 100644
--- a/README.md
+++ b/README.md
@@ -90,6 +90,7 @@ pip install -e .
 
 * To upgrade TorchServe or model archiver from source code and make changes executable, run:
 
+For CPU run the following command:
 ```bash
 pip install -U -e .
 ```
@@ -249,6 +250,14 @@ To run your TorchServe Docker image and start TorchServe inside the container wi
 ```bash
 ./start.sh
 ```
+For GPU run the following command:
+```bash
+./start.sh --gpu
+```
+For GPU with specific GPU device ids run the following command:
+```bash
+./start.sh --gpu_devices 1,2,3
+```
 
 ## Learn More
 
diff --git a/build_image.sh b/build_image.sh
index dfc155e7c7..2320b5711d 100755
--- a/build_image.sh
+++ b/build_image.sh
@@ -2,6 +2,7 @@
 
 MACHINE=cpu
 BRANCH_NAME="master"
+DOCKER_TAG="pytorch/torchserve:latest"
 
 for arg in "$@"
 do
@@ -26,6 +27,7 @@ do
           ;;
         -g|--gpu)
           MACHINE=gpu
+          DOCKER_TAG="pytorch/torchserve:latest-gpu"
           shift
           ;;
     esac
@@ -37,4 +39,4 @@ git clone https://github.com/pytorch/serve.git
 cd serve
 git checkout $BRANCH_NAME
 cd ..
-docker build --file Dockerfile.$MACHINE -t torchserve:1.0 .
+docker build --file Dockerfile.$MACHINE -t $DOCKER_TAG .
diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu
index e4e50d8eb1..aade3ada6c 100644
--- a/docker/Dockerfile.gpu
+++ b/docker/Dockerfile.gpu
@@ -9,7 +9,7 @@ RUN apt-get update && \
     dpkg-dev \
     g++ \
     python3-dev \
-    openjdk-8-jdk-headless \
+    openjdk-11-jdk-headless \
     curl \
     vim \
     && rm -rf /var/lib/apt/lists/* \
@@ -45,4 +45,4 @@ ENV TEMP=/home/model-server/tmp
 ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
 CMD ["serve"]
 
-LABEL maintainer="wongale@amazon.com"
\ No newline at end of file
+LABEL maintainer="wongale@amazon.com"
diff --git a/docker/README.md b/docker/README.md
index 2b8c9af649..716f18031e 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -25,6 +25,11 @@ For specific versions you can pass in the specific tag to use (ex: 0.1-cpu):
 ```bash
 docker run --rm -it -p 8080:8080 -p 8081:8081 pytorch/torchserve:0.1-cpu
 ```
+For GPU based image :
+
+```bash
+docker run --rm -it --gpus --gpus '"device=1,2"' -p 8080:8080 -p 8081:8081 torchserve:1.0
+```
 
 For the latest version, you can use the `latest` tag:
 docker run --rm -it -p 8080:8080 -p 8081:8081 pytorch/torchserve:latest
diff --git a/start.sh b/start.sh
index c841ebc2f2..3371e21b67 100755
--- a/start.sh
+++ b/start.sh
@@ -1,9 +1,37 @@
 #!/bin/bash
-IMAGE_NAME="torchserve:1.0"
+IMAGE_NAME="pytorch/torchserve:latest"
 
-echo "Starting torchserve:1.0 docker image"
+for arg in "$@"
+do
+    case $arg in
+        -h|--help)
+          echo "options:"
+          echo "-h, --help  show brief help"
+          echo "-g, --gpu specify to use gpu"
+          echo "-d, --gpu_devices to use specific gpu device ids"
+          exit 0
+          ;;
+        -g|--gpu)
+          DOCKER_RUNTIME="--runtime=nvidia"
+          IMAGE_NAME="pytorch/torchserve:latest-gpu"
+          shift
+          ;;
+	-d|--gpu_devices)
+          if test $
+          then
+            DOCKER_RUNTIME="--runtime=nvidia"
+            IMAGE_NAME="pytorch/torchserve:latest-gpu"
+            GPU_DEVICES="-e NVIDIA_VISIBLE_DEVICES=$2"
+            shift
+          fi
+          shift
+          ;;
+    esac
+done
+echo "Starting $IMAGE_NAME docker image"
+
+docker run $DOCKER_RUNTIME $GPU_DEVICES -d --rm -it -p 8080:8080 -p 8081:8081 $IMAGE_NAME > /dev/null 2>&1
 
-docker run -d --rm -it -p 8080:8080 -p 8081:8081 torchserve:1.0 > /dev/null 2>&1
 container_id=$(docker ps --filter="ancestor=$IMAGE_NAME" -q | xargs)
 
 sleep 30