pytorch · agunapal · Nov 21, 2022 · Nov 16, 2022 · Nov 17, 2022 · Nov 17, 2022
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -22,7 +22,7 @@ Your contributions will fall into two categories:
            ```bash
            python ts_scripts/install_dependencies.py --environment=dev --cuda=cu102
            ```
-            > Supported cuda versions as cu116, cu113, cu111, cu102, cu101, cu92
+            > Supported cuda versions as cu117, cu116, cu113, cu111, cu102, cu101, cu92
         - Install `pre-commit` to your Git flow:
             ```bash
             pre-commit install

diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py
@@ -1,30 +1,31 @@
 import argparse
 import datetime
 import os
-import ruamel.yaml
 import shutil
 from subprocess import Popen
-from utils import gen_model_config_json
-from utils import gen_md_report
-from utils import gen_metrics_json
+
+import ruamel.yaml
+from utils import gen_md_report, gen_metrics_json, gen_model_config_json
 
 CWD = os.getcwd()
-MODEL_JSON_CONFIG_PATH = CWD + '/model_json_config'
-BENCHMARK_TMP_PATH = '/tmp/benchmark'
-BENCHMARK_REPORT_PATH = '/tmp/ts_benchmark'
-TS_LOGS_PATH = CWD + '/logs'
-MODEL_STORE = '/tmp/model_store'
-WF_STORE = '/tmp/wf_store'
+MODEL_JSON_CONFIG_PATH = CWD + "/model_json_config"
+BENCHMARK_TMP_PATH = "/tmp/benchmark"
+BENCHMARK_REPORT_PATH = "/tmp/ts_benchmark"
+TS_LOGS_PATH = CWD + "/logs"
+MODEL_STORE = "/tmp/model_store"
+WF_STORE = "/tmp/wf_store"
+
 
 class BenchmarkConfig:
     def __init__(self, yaml_dict, skip_ts_install):
         self.yaml_dict = yaml_dict
         self.skip_ts_install = skip_ts_install
         self.bm_config = {}
         yesterday = datetime.date.today() - datetime.timedelta(days=1)
-        self.bm_config["version"] = \
-            "torchserve-nightly=={}.{}.{}".format(yesterday.year, yesterday.month, yesterday.day)
-        self.bm_config["hardware"] = 'cpu'
+        self.bm_config["version"] = "torchserve-nightly=={}.{}.{}".format(
+            yesterday.year, yesterday.month, yesterday.day
+        )
+        self.bm_config["hardware"] = "cpu"
 
     def ts_version(self, version):
         for k, v in version.items():
@@ -48,15 +49,15 @@ def metrics_cmd(self, cmd):
             for k, v in key_value.items():
                 if k == "cmd":
                     cmd_options.append(v)
-                elif k == '--namespace':
+                elif k == "--namespace":
                     cmd_options.append(k)
-                    cmd_options.append(''.join(v))
+                    cmd_options.append("".join(v))
                 else:
                     cmd_options.append(k)
                     cmd_options.append(v)
                 break
 
-        self.bm_config["metrics_cmd"] = ' '.join(cmd_options)
+        self.bm_config["metrics_cmd"] = " ".join(cmd_options)
 
     def report_cmd(self, cmd):
         cmd_options = []
@@ -70,12 +71,14 @@ def report_cmd(self, cmd):
                             today = datetime.date.today()
                             v[i] = "{}-{}-{}".format(today.year, today.month, today.day)
                             break
-                    cmd_options.append('{}/{}'.format('/'.join(v), self.bm_config["version"]))
+                    cmd_options.append(
+                        "{}/{}".format("/".join(v), self.bm_config["version"])
+                    )
                 else:
                     cmd_options.append(v)
                 break
 
-        self.bm_config["report_cmd"] = ' '.join(cmd_options)
+        self.bm_config["report_cmd"] = " ".join(cmd_options)
 
     def load_config(self):
         report_cmd = None
@@ -91,10 +94,11 @@ def load_config(self):
             elif k == "report_cmd":
                 report_cmd = v
 
-        self.bm_config["model_config_path"] = \
-            '{}/cpu'.format(MODEL_JSON_CONFIG_PATH) \
-                if self.bm_config["hardware"] == 'cpu' \
-                else '{}/gpu'.format(MODEL_JSON_CONFIG_PATH)
+        self.bm_config["model_config_path"] = (
+            "{}/cpu".format(MODEL_JSON_CONFIG_PATH)
+            if self.bm_config["hardware"] == "cpu"
+            else "{}/gpu".format(MODEL_JSON_CONFIG_PATH)
+        )
 
         if self.skip_ts_install:
             self.bm_config["version"] = get_torchserve_version()
@@ -105,67 +109,75 @@ def load_config(self):
         for k, v in self.bm_config.items():
             print("{}={}".format(k, v))
 
+
 def load_benchmark_config(bm_config_path, skip_ts_install):
     yaml = ruamel.yaml.YAML()
-    with open(bm_config_path, 'r') as f:
+    with open(bm_config_path, "r") as f:
         yaml_dict = yaml.load(f)
 
         benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install)
         benchmark_config.load_config()
 
     return benchmark_config.bm_config
 
+
 def benchmark_env_setup(bm_config, skip_ts_install):
     install_torchserve(skip_ts_install, bm_config["hardware"], bm_config["version"])
     setup_benchmark_path(bm_config["model_config_path"])
     build_model_json_config(bm_config["models"])
 
+
 def install_torchserve(skip_ts_install, hw, ts_version):
     if skip_ts_install:
         return
 
     # git checkout branch if it is needed
-    cmd = 'git checkout master && git reset --hard && git clean -dffx . && git pull --rebase'
+    cmd = "git checkout master && git reset --hard && git clean -dffx . && git pull --rebase"
     execute(cmd, wait=True)
     print("successfully reset git")
 
     ts_install_cmd = None
-    if ts_version.startswith("torchserve==") or ts_version.startswith("torchserve-nightly=="):
-        ts_install_cmd = 'pip install {}'.format(ts_version)
+    if ts_version.startswith("torchserve==") or ts_version.startswith(
+        "torchserve-nightly=="
+    ):
+        ts_install_cmd = "pip install {}".format(ts_version)
     else:
-        cmd = 'git checkout {}'.format(ts_version)
+        cmd = "git checkout {}".format(ts_version)
         execute(cmd, wait=True)
 
     # install_dependencies.py
-    if hw == 'gpu':
-        cmd = 'python ts_scripts/install_dependencies.py --environment dev --cuda cu102'
+    if hw == "gpu":
+        cmd = "python ts_scripts/install_dependencies.py --environment dev --cuda cu116"
     else:
-        cmd = 'python ts_scripts/install_dependencies.py --environment dev'
+        cmd = "python ts_scripts/install_dependencies.py --environment dev"
     execute(cmd, wait=True)
     print("successfully install install_dependencies.py")
 
     # install torchserve
     if ts_install_cmd is None:
-        ts_install_cmd = 'python ts_scripts/install_from_src.py'
+        ts_install_cmd = "python ts_scripts/install_from_src.py"
     execute(ts_install_cmd, wait=True)
     print("successfully install torchserve")
 
+
 def setup_benchmark_path(model_config_path):
     benchmark_path_list = [BENCHMARK_TMP_PATH, BENCHMARK_REPORT_PATH, model_config_path]
     for benchmark_path in benchmark_path_list:
         shutil.rmtree(benchmark_path, ignore_errors=True)
         os.makedirs(benchmark_path, exist_ok=True)
 
-        print('successfully setup benchmark_path={}'.format(benchmark_path))
+        print("successfully setup benchmark_path={}".format(benchmark_path))
+
 
 def build_model_json_config(models):
     for model in models:
-        if model.startswith('/'):
+        if model.startswith("/"):
             input_file = model
         else:
-            input_file = CWD + '/benchmarks/models_config/{}'.format(model)
+            input_file = CWD + "/benchmarks/models_config/{}".format(model)
         gen_model_config_json.convert_yaml_to_json(input_file, MODEL_JSON_CONFIG_PATH)
 
+
 def run_benchmark(bm_config):
     files = os.listdir(bm_config["model_config_path"])
     files.sort()
@@ -174,67 +186,84 @@ def run_benchmark(bm_config):
             # call benchmark-ab.py
             shutil.rmtree(TS_LOGS_PATH, ignore_errors=True)
             shutil.rmtree(BENCHMARK_TMP_PATH, ignore_errors=True)
-            cmd = 'python ./benchmarks/benchmark-ab.py --tmp_dir /tmp --report_location /tmp --config_properties ' \
-                  './benchmarks/config.properties --config {}/{}'\
-                .format(bm_config["model_config_path"], model_json_config)
+            cmd = (
+                "python ./benchmarks/benchmark-ab.py --tmp_dir /tmp --report_location /tmp --config_properties "
+                "./benchmarks/config.properties --config {}/{}".format(
+                    bm_config["model_config_path"], model_json_config
+                )
+            )
             execute(cmd, wait=True)
 
             # generate stats metrics from ab_report.csv
-            bm_model = model_json_config[0: -len('.json')]
+            bm_model = model_json_config[0 : -len(".json")]
 
             gen_metrics_json.gen_metric(
-                '{}/ab_report.csv'.format(BENCHMARK_TMP_PATH),
-                '{}/logs/stats_metrics.json'.format(BENCHMARK_TMP_PATH)
+                "{}/ab_report.csv".format(BENCHMARK_TMP_PATH),
+                "{}/logs/stats_metrics.json".format(BENCHMARK_TMP_PATH),
             )
 
             # load stats metrics to remote metrics storage
             if "metrics_cmd" in bm_config:
                 execute(bm_config["metrics_cmd"], wait=True)
 
             # cp benchmark logs to local
-            bm_model_log_path = '{}/{}'.format(BENCHMARK_REPORT_PATH, bm_model)
+            bm_model_log_path = "{}/{}".format(BENCHMARK_REPORT_PATH, bm_model)
             os.makedirs(bm_model_log_path, exist_ok=True)
-            csv_file = '{}/ab_report.csv'.format(BENCHMARK_TMP_PATH)
+            csv_file = "{}/ab_report.csv".format(BENCHMARK_TMP_PATH)
             if os.path.exists(csv_file):
                 shutil.move(csv_file, bm_model_log_path)
-            cmd = 'tar -cvzf {}/benchmark.tar.gz {}'.format(bm_model_log_path, BENCHMARK_TMP_PATH)
+            cmd = "tar -cvzf {}/benchmark.tar.gz {}".format(
+                bm_model_log_path, BENCHMARK_TMP_PATH
+            )
             execute(cmd, wait=True)
 
-            cmd = 'tar -cvzf {}/logs.tar.gz {}'.format(bm_model_log_path, TS_LOGS_PATH)
+            cmd = "tar -cvzf {}/logs.tar.gz {}".format(bm_model_log_path, TS_LOGS_PATH)
             execute(cmd, wait=True)
             print("finish benchmark {}".format(bm_model))
 
     # generate final report
     gen_md_report.iterate_subdir(
         BENCHMARK_REPORT_PATH,
-        '{}/report.md'.format(BENCHMARK_REPORT_PATH),
+        "{}/report.md".format(BENCHMARK_REPORT_PATH),
         bm_config["hardware"],
-        bm_config["version"])
+        bm_config["version"],
+    )
     print("report.md is generated")
 
     # load logs to remote storage
     if "report_cmd" in bm_config:
         execute(bm_config["report_cmd"], wait=True)
 
+
 def clean_up_benchmark_env(bm_config):
     shutil.rmtree(BENCHMARK_TMP_PATH, ignore_errors=True)
     shutil.rmtree(MODEL_JSON_CONFIG_PATH, ignore_errors=True)
     shutil.rmtree(MODEL_STORE, ignore_errors=True)
     shutil.rmtree(WF_STORE, ignore_errors=True)
 
+
 def execute(command, wait=False, stdout=None, stderr=None, shell=True):
     print("execute: {}".format(command))
-    cmd = Popen(command, shell=shell, close_fds=True, stdout=stdout, stderr=stderr, universal_newlines=True)
+    cmd = Popen(
+        command,
+        shell=shell,
+        close_fds=True,
+        stdout=stdout,
+        stderr=stderr,
+        universal_newlines=True,
+    )
     if wait:
         cmd.wait()
     return cmd
 
+
 def get_torchserve_version():
     # fetch the torchserve version from version.txt file
-    with open(os.path.join(CWD, 'ts', 'version.txt'), 'r') as file:
+    with open(os.path.join(CWD, "ts", "version.txt"), "r") as file:
         version = file.readline().rstrip()
     return version
 
+
 def main():
     parser = argparse.ArgumentParser()
 
@@ -250,12 +279,17 @@ def main():
     )
 
     arguments = parser.parse_args()
-    skip_ts_config = False if arguments.skip is not None and arguments.skip.lower() == "false" else True
+    skip_ts_config = (
+        False
+        if arguments.skip is not None and arguments.skip.lower() == "false"
+        else True
+    )
     bm_config = load_benchmark_config(arguments.input, skip_ts_config)
     benchmark_env_setup(bm_config, skip_ts_config)
     run_benchmark(bm_config)
     clean_up_benchmark_env(bm_config)
     print("benchmark_serving.sh finished successfully.")
 
+
 if __name__ == "__main__":
     main()
diff --git a/docker/README.md b/docker/README.md
@@ -36,7 +36,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p
 |-g, --gpu|Build image with GPU based ubuntu base image|
 |-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, codebuild|
 |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.|
-|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`. Default `cu102`|
+|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`. Default `cu116`|
 |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.|
 |--codebuild| Set if you need [AWS CodeBuild](https://aws.amazon.com/codebuild/)|
 
@@ -51,7 +51,7 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc
 ./build_image.sh
 ```
 
- - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`
+ - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`
 
   ```bash
   ./build_image.sh -g -cv cu102

diff --git a/docker/build_image.sh b/docker/build_image.sh
@@ -42,8 +42,8 @@ do
         -g|--gpu)
           MACHINE=gpu
           DOCKER_TAG="pytorch/torchserve:latest-gpu"
-          BASE_IMAGE="nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04"
-          CUDA_VERSION="cu102"
+          BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04"
+          CUDA_VERSION="cu116"
           shift
           ;;
         -bt|--buildtype)
@@ -65,18 +65,21 @@ do
           BUILD_WITH_IPEX=true
           shift
           ;;
-        # With default ubuntu version 18.04
+        # With default ubuntu version 20.04
         -cv|--cudaversion)
           CUDA_VERSION="$2"
-          if [ $CUDA_VERSION == "cu116" ];
+          if [ $CUDA_VERSION == "cu117" ];
           then
-            BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu18.04"
+            BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04"
+          elif [ $CUDA_VERSION == "cu116" ];
+          then
+            BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04"
           elif [ $CUDA_VERSION == "cu113" ];
           then
-            BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-runtime-ubuntu18.04"
+            BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-runtime-ubuntu20.04"
           elif [ $CUDA_VERSION == "cu111" ];
           then
-            BASE_IMAGE="nvidia/cuda:11.1.1-cudnn8-runtime-ubuntu18.04"
+            BASE_IMAGE="nvidia/cuda:11.1.1-cudnn8-runtime-ubuntu20.04"
           elif [ $CUDA_VERSION == "cu102" ];
           then
             BASE_IMAGE="nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04"
@@ -96,7 +99,10 @@ do
         # CUDA 10 is not supported on Ubuntu 20.04
         -ub|--ubuntu)
           UBUNTU_VERSION="$2"
-          if [[ $CUDA_VERSION == "cu116"  &&  $UBUNTU_VERSION == "ubuntu20.04" ]];
+          if [[ $CUDA_VERSION == "cu117"  &&  $UBUNTU_VERSION == "ubuntu20.04" ]];
+          then
+            BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04"
+          elif [[ $CUDA_VERSION == "cu116"  &&  $UBUNTU_VERSION == "ubuntu20.04" ]];
           then
             BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04"
           elif [[ $CUDA_VERSION == "cu113" && $UBUNTU_VERSION == "ubuntu20.04" ]];

diff --git a/docker/docker_nightly.py b/docker/docker_nightly.py
@@ -34,7 +34,7 @@
     # Build Nightly images and append the date in the name
     try_and_handle(f"./build_image.sh -bt dev -t {organization}/{cpu_version}", dry_run)
     try_and_handle(
-        f"./build_image.sh -bt dev -g -cv cu102 -t {organization}/{gpu_version}",
+        f"./build_image.sh -bt dev -g -cv cu116 -t {organization}/{gpu_version}",
         dry_run,
     )