Fixes #205 - Docker install not finding GPUs (#262)

Invokes docker with --runtime=nvidia for GPU Added the option to specify the GPU / specific GPU ids in start.sh Fixed the documentation for start.sh script Fixed the JDK version in Dockerfile.gpu Co-authored-by: Aaqib <maaquib@gmail.com>
pytorch · Apr 30, 2020 · e8a2c75 · chauhang · May 10, 2020 · maaquib
1 parent 9e06b12
commit e8a2c75
Show file tree

Hide file tree

Showing 5 changed files with 50 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -90,6 +90,7 @@ pip install -e .
 
 * To upgrade TorchServe or model archiver from source code and make changes executable, run:
 
+For CPU run the following command:
 ```bash
 pip install -U -e .
 ```
@@ -249,6 +250,14 @@ To run your TorchServe Docker image and start TorchServe inside the container wi
 ```bash
 ./start.sh
 ```
+For GPU run the following command:
+```bash
+./start.sh --gpu
+```
+For GPU with specific GPU device ids run the following command:
+```bash
+./start.sh --gpu_devices 1,2,3
+```
 
 ## Learn More
 

diff --git a/build_image.sh b/build_image.sh
@@ -2,6 +2,7 @@
 
 MACHINE=cpu
 BRANCH_NAME="master"
+DOCKER_TAG="pytorch/torchserve:latest"
 
 for arg in "$@"
 do
@@ -26,6 +27,7 @@ do
           ;;
         -g|--gpu)
           MACHINE=gpu
+          DOCKER_TAG="pytorch/torchserve:latest-gpu"
           shift
           ;;
     esac
@@ -37,4 +39,4 @@ git clone https://github.com/pytorch/serve.git
 cd serve
 git checkout $BRANCH_NAME
 cd ..
-docker build --file Dockerfile.$MACHINE -t torchserve:1.0 .
+docker build --file Dockerfile.$MACHINE -t $DOCKER_TAG .
diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu
@@ -9,7 +9,7 @@ RUN apt-get update && \
     dpkg-dev \
     g++ \
     python3-dev \
-    openjdk-8-jdk-headless \
+    openjdk-11-jdk-headless \
     curl \
     vim \
     && rm -rf /var/lib/apt/lists/* \
@@ -45,4 +45,4 @@ ENV TEMP=/home/model-server/tmp
 ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
 CMD ["serve"]
 
-LABEL maintainer="wongale@amazon.com"
+LABEL maintainer="wongale@amazon.com"
diff --git a/docker/README.md b/docker/README.md
@@ -25,6 +25,11 @@ For specific versions you can pass in the specific tag to use (ex: 0.1-cpu):
 ```bash
 docker run --rm -it -p 8080:8080 -p 8081:8081 pytorch/torchserve:0.1-cpu
 ```
+For GPU based image :
+
+```bash
+docker run --rm -it --gpus --gpus '"device=1,2"' -p 8080:8080 -p 8081:8081 torchserve:1.0
+```
 
 For the latest version, you can use the `latest` tag:
 docker run --rm -it -p 8080:8080 -p 8081:8081 pytorch/torchserve:latest

diff --git a/start.sh b/start.sh
@@ -1,9 +1,37 @@
 #!/bin/bash
-IMAGE_NAME="torchserve:1.0"
+IMAGE_NAME="pytorch/torchserve:latest"
 
-echo "Starting torchserve:1.0 docker image"
+for arg in "$@"
+do
+    case $arg in
+        -h|--help)
+          echo "options:"
+          echo "-h, --help  show brief help"
+          echo "-g, --gpu specify to use gpu"
+          echo "-d, --gpu_devices to use specific gpu device ids"
+          exit 0
+          ;;
+        -g|--gpu)
+          DOCKER_RUNTIME="--runtime=nvidia"
+          IMAGE_NAME="pytorch/torchserve:latest-gpu"
+          shift
+          ;;
+	-d|--gpu_devices)
+          if test $
+          then
+            DOCKER_RUNTIME="--runtime=nvidia"
+            IMAGE_NAME="pytorch/torchserve:latest-gpu"
+            GPU_DEVICES="-e NVIDIA_VISIBLE_DEVICES=$2"
+            shift
+          fi
+          shift
+          ;;
+    esac
+done
+echo "Starting $IMAGE_NAME docker image"
+
+docker run $DOCKER_RUNTIME $GPU_DEVICES -d --rm -it -p 8080:8080 -p 8081:8081 $IMAGE_NAME > /dev/null 2>&1
 
-docker run -d --rm -it -p 8080:8080 -p 8081:8081 torchserve:1.0 > /dev/null 2>&1
 container_id=$(docker ps --filter="ancestor=$IMAGE_NAME" -q | xargs)
 
 sleep 30