From 40ce8159c7772806b9354ad3b5826a085fd46b32 Mon Sep 17 00:00:00 2001 From: agunapal Date: Wed, 16 Nov 2022 00:09:17 +0000 Subject: [PATCH 01/20] Changes to support PyTorch 1.13 --- CONTRIBUTING.md | 2 +- docker/README.md | 4 ++-- docker/build_image.sh | 5 ++++- docs/code_coverage.md | 8 ++++---- docs/getting_started.md | 20 +++++++++----------- requirements/torch_cu116_linux.txt | 8 ++++---- requirements/torch_cu116_windows.txt | 8 ++++---- requirements/torch_cu117_linux.txt | 9 +++++++++ requirements/torch_cu117_windows.txt | 6 ++++++ requirements/torch_darwin.txt | 8 ++++---- requirements/torch_linux.txt | 8 ++++---- requirements/torch_windows.txt | 10 +++++----- ts_scripts/install_dependencies.py | 2 +- 13 files changed, 57 insertions(+), 41 deletions(-) create mode 100644 requirements/torch_cu117_linux.txt create mode 100644 requirements/torch_cu117_windows.txt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a58827ad99..a78a5860d6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,7 +22,7 @@ Your contributions will fall into two categories: ```bash python ts_scripts/install_dependencies.py --environment=dev --cuda=cu102 ``` - > Supported cuda versions as cu116, cu113, cu111, cu102, cu101, cu92 + > Supported cuda versions as cu117, cu116, cu113, cu111, cu102, cu101, cu92 - Install `pre-commit` to your Git flow: ```bash pre-commit install diff --git a/docker/README.md b/docker/README.md index 51e1e783ca..8a3fe233aa 100644 --- a/docker/README.md +++ b/docker/README.md @@ -36,7 +36,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p |-g, --gpu|Build image with GPU based ubuntu base image| |-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, codebuild| |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.| -|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`. Default `cu102`| +|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`. Default `cu102`| |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.| |--codebuild| Set if you need [AWS CodeBuild](https://aws.amazon.com/codebuild/)| @@ -51,7 +51,7 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc ./build_image.sh ``` - - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116` + - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117` ```bash ./build_image.sh -g -cv cu102 diff --git a/docker/build_image.sh b/docker/build_image.sh index fe3c747239..846847d63c 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -68,7 +68,10 @@ do # With default ubuntu version 18.04 -cv|--cudaversion) CUDA_VERSION="$2" - if [ $CUDA_VERSION == "cu116" ]; + if [ $CUDA_VERSION == "cu117" ]; + then + BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu18.04" + elif [ $CUDA_VERSION == "cu116" ]; then BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu18.04" elif [ $CUDA_VERSION == "cu113" ]; diff --git a/docs/code_coverage.md b/docs/code_coverage.md index 36e8fd6971..6f8a746bc3 100644 --- a/docs/code_coverage.md +++ b/docs/code_coverage.md @@ -6,14 +6,14 @@ ```bash python ts_scripts/install_dependencies.py --environment=dev ``` - + For GPU - Install dependencies (if not already installed) ```bash python ts_scripts/install_dependencies.py --environment=dev --cuda=cu102 ``` - > Supported cuda versions as cu116, cu113, cu111, cu102, cu101, cu92 - + > Supported cuda versions as cu117, cu116, cu113, cu111, cu102, cu101, cu92 + - Execute sanity suite ```bash python ./torchserve_sanity.py @@ -31,7 +31,7 @@ TorchServe frontend build suite consists of : * findbugs * PMD * UT - + The reports are generated at following path : `frontend/server/build/reports` ## To run backend pytest suite run following command diff --git a/docs/getting_started.md b/docs/getting_started.md index 7c2aa47820..3362dbb2d3 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -7,20 +7,20 @@ Note: For Conda, Python 3.8 is required to run Torchserve. #### For Debian Based Systems/ MacOS - + - For CPU ```bash python ./ts_scripts/install_dependencies.py ``` - - - For GPU with Cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116` + + - For GPU with Cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117` ```bash python ./ts_scripts/install_dependencies.py --cuda=cu102 ``` - - Note: PyTorch 1.9+ will not support cu92 and cu101. So TorchServe only supports cu92 and cu101 up to PyTorch 1.8.1. + + Note: PyTorch 1.9+ will not support cu92 and cu101. So TorchServe only supports cu92 and cu101 up to PyTorch 1.8.1. #### For Windows @@ -28,12 +28,12 @@ 2. Install torchserve, torch-model-archiver and torch-workflow-archiver - For [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install) + For [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install) Note: Conda packages are not supported for Windows. Refer to the documentation [here](./torchserve_on_win_native.md). ``` conda install torchserve torch-model-archiver torch-workflow-archiver -c pytorch ``` - + For Pip ``` pip install torchserve torch-model-archiver torch-workflow-archiver @@ -98,7 +98,7 @@ To test the model server, send a request to the server's `predictions` API. Torc #### Using GRPC APIs through python client - Install grpc python dependencies : - + ```bash pip install -U grpcio protobuf grpcio-tools ``` @@ -125,7 +125,7 @@ As an example we'll download the below cute kitten with curl -O https://raw.githubusercontent.com/pytorch/serve/master/docs/images/kitten_small.jpg ``` -And then call the prediction endpoint +And then call the prediction endpoint ```bash curl http://127.0.0.1:8080/predictions/densenet161 -T kitten_small.jpg @@ -195,5 +195,3 @@ Use `--cuda` flag with `install_dependencies.py` for installing cuda version spe Refer to the documentation [here](./torchserve_on_win_native.md). For information about the model archiver, see [detailed documentation](https://github.com/pytorch/serve/tree/master/model-archiver/README.md). - - diff --git a/requirements/torch_cu116_linux.txt b/requirements/torch_cu116_linux.txt index 0f7a2b6e58..ede7fb9f08 100644 --- a/requirements/torch_cu116_linux.txt +++ b/requirements/torch_cu116_linux.txt @@ -3,7 +3,7 @@ cython wheel pillow==9.0.1 -torch==1.12.0+cu116; sys_platform == 'linux' -torchvision==0.13.0+cu116; sys_platform == 'linux' -torchtext==0.13.0; sys_platform == 'linux' -torchaudio==0.12.0+cu116; sys_platform == 'linux' +torch==1.13.0+cu116; sys_platform == 'linux' +torchvision==0.14.0+cu116; sys_platform == 'linux' +torchtext==0.14.0; sys_platform == 'linux' +torchaudio==0.13.0+cu116; sys_platform == 'linux' diff --git a/requirements/torch_cu116_windows.txt b/requirements/torch_cu116_windows.txt index 85818b0640..3ec34a6b42 100644 --- a/requirements/torch_cu116_windows.txt +++ b/requirements/torch_cu116_windows.txt @@ -1,6 +1,6 @@ #pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 --extra-index-url https://download.pytorch.org/whl/cu116 -torch==1.12.0+cu116; sys_platform == 'win32' -torchvision==0.13.0+cu116; sys_platform == 'win32' -torchtext==0.13.0; sys_platform == 'win32' -torchaudio==0.12.0+cu116; sys_platform == 'win32' +torch==1.13.0+cu116; sys_platform == 'win32' +torchvision==0.14.0+cu116; sys_platform == 'win32' +torchtext==0.14.0; sys_platform == 'win32' +torchaudio==0.13.0+cu116; sys_platform == 'win32' diff --git a/requirements/torch_cu117_linux.txt b/requirements/torch_cu117_linux.txt new file mode 100644 index 0000000000..7c5baf1065 --- /dev/null +++ b/requirements/torch_cu117_linux.txt @@ -0,0 +1,9 @@ +#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 +--extra-index-url https://download.pytorch.org/whl/cu116 +cython +wheel +pillow==9.0.1 +torch==1.13.0+cu117; sys_platform == 'linux' +torchvision==0.14.0+cu117; sys_platform == 'linux' +torchtext==0.14.0; sys_platform == 'linux' +torchaudio==0.13.0+cu117; sys_platform == 'linux' diff --git a/requirements/torch_cu117_windows.txt b/requirements/torch_cu117_windows.txt new file mode 100644 index 0000000000..c785fc53b7 --- /dev/null +++ b/requirements/torch_cu117_windows.txt @@ -0,0 +1,6 @@ +#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 +--extra-index-url https://download.pytorch.org/whl/cu116 +torch==1.13.0+cu117; sys_platform == 'win32' +torchvision==0.14.0+cu117; sys_platform == 'win32' +torchtext==0.14.0; sys_platform == 'win32' +torchaudio==0.13.0+cu117; sys_platform == 'win32' diff --git a/requirements/torch_darwin.txt b/requirements/torch_darwin.txt index 9d8fe1d509..86cbbc1234 100644 --- a/requirements/torch_darwin.txt +++ b/requirements/torch_darwin.txt @@ -1,5 +1,5 @@ #pip install torch torchvision torchaudio -torch==1.12.0; sys_platform == 'darwin' -torchvision==0.13.0; sys_platform == 'darwin' -torchtext==0.13.0; sys_platform == 'darwin' -torchaudio==0.12.0; sys_platform == 'darwin' \ No newline at end of file +torch==1.13.0; sys_platform == 'darwin' +torchvision==0.14.0; sys_platform == 'darwin' +torchtext==0.14.0; sys_platform == 'darwin' +torchaudio==0.13.0; sys_platform == 'darwin' diff --git a/requirements/torch_linux.txt b/requirements/torch_linux.txt index 1cdb29b0a8..3d835c6223 100644 --- a/requirements/torch_linux.txt +++ b/requirements/torch_linux.txt @@ -3,7 +3,7 @@ cython wheel pillow==9.0.1 -torch==1.12.0+cpu; sys_platform == 'linux' -torchvision==0.13.0+cpu; sys_platform == 'linux' -torchtext==0.13.0; sys_platform == 'linux' -torchaudio==0.12.0+cpu; sys_platform == 'linux' \ No newline at end of file +torch==1.13.0+cpu; sys_platform == 'linux' +torchvision==0.14.0+cpu; sys_platform == 'linux' +torchtext==0.14.0; sys_platform == 'linux' +torchaudio==0.13.0+cpu; sys_platform == 'linux' diff --git a/requirements/torch_windows.txt b/requirements/torch_windows.txt index ddc228cc4e..6e6e02bb4b 100644 --- a/requirements/torch_windows.txt +++ b/requirements/torch_windows.txt @@ -1,7 +1,7 @@ #pip install torch torchvision torchaudio wheel -torch==1.12.0; sys_platform == 'win32' -torchvision==0.13.0; sys_platform == 'win32' -torchtext==0.13.0; sys_platform == 'win32' -torchaudio==0.12.0; sys_platform == 'win32' -pillow==9.0.1 \ No newline at end of file +torch==1.13.0; sys_platform == 'win32' +torchvision==0.14.0; sys_platform == 'win32' +torchtext==0.14.0; sys_platform == 'win32' +torchaudio==0.13.0; sys_platform == 'win32' +pillow==9.0.1 diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py index 1b0484c4e5..fd6485f811 100644 --- a/ts_scripts/install_dependencies.py +++ b/ts_scripts/install_dependencies.py @@ -174,7 +174,7 @@ def get_brew_version(): parser.add_argument( "--cuda", default=None, - choices=["cu92", "cu101", "cu102", "cu111", "cu113", "cu116"], + choices=["cu92", "cu101", "cu102", "cu111", "cu113", "cu116", "cu117"], help="CUDA version for torch", ) parser.add_argument( From 08b4274306e63f5f0a65d48fac162e7a64ad891c Mon Sep 17 00:00:00 2001 From: agunapal Date: Thu, 17 Nov 2022 20:38:00 +0000 Subject: [PATCH 02/20] review comments --- requirements/torch_cu117_linux.txt | 4 ++-- requirements/torch_cu117_windows.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/torch_cu117_linux.txt b/requirements/torch_cu117_linux.txt index 7c5baf1065..9da8c86ef1 100644 --- a/requirements/torch_cu117_linux.txt +++ b/requirements/torch_cu117_linux.txt @@ -1,5 +1,5 @@ -#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 ---extra-index-url https://download.pytorch.org/whl/cu116 +#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 +--extra-index-url https://download.pytorch.org/whl/cu117 cython wheel pillow==9.0.1 diff --git a/requirements/torch_cu117_windows.txt b/requirements/torch_cu117_windows.txt index c785fc53b7..470eb2741c 100644 --- a/requirements/torch_cu117_windows.txt +++ b/requirements/torch_cu117_windows.txt @@ -1,5 +1,5 @@ -#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 ---extra-index-url https://download.pytorch.org/whl/cu116 +#pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117 +--extra-index-url https://download.pytorch.org/whl/cu117 torch==1.13.0+cu117; sys_platform == 'win32' torchvision==0.14.0+cu117; sys_platform == 'win32' torchtext==0.14.0; sys_platform == 'win32' From a50335d0509d303546055e79f76419e424f08997 Mon Sep 17 00:00:00 2001 From: agunapal Date: Thu, 17 Nov 2022 20:50:06 +0000 Subject: [PATCH 03/20] Updated default CUDA version for docker to cu116 --- docker/README.md | 2 +- docker/build_image.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/README.md b/docker/README.md index 8a3fe233aa..20d1e55e2e 100644 --- a/docker/README.md +++ b/docker/README.md @@ -36,7 +36,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p |-g, --gpu|Build image with GPU based ubuntu base image| |-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, codebuild| |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.| -|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`. Default `cu102`| +|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`. Default `cu116`| |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.| |--codebuild| Set if you need [AWS CodeBuild](https://aws.amazon.com/codebuild/)| diff --git a/docker/build_image.sh b/docker/build_image.sh index fa32f3a24c..e1016b6036 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -42,8 +42,8 @@ do -g|--gpu) MACHINE=gpu DOCKER_TAG="pytorch/torchserve:latest-gpu" - BASE_IMAGE="nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04" - CUDA_VERSION="cu102" + BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu18.04" + CUDA_VERSION="cu116" shift ;; -bt|--buildtype) From 9eac44d3c0c94e159521308bf39e3dfab037c16e Mon Sep 17 00:00:00 2001 From: agunapal Date: Thu, 17 Nov 2022 20:57:07 +0000 Subject: [PATCH 04/20] Updated default CUDA version for docker to cu116 --- docker/docker_nightly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/docker_nightly.py b/docker/docker_nightly.py index a1e42aa80a..ba3917887a 100644 --- a/docker/docker_nightly.py +++ b/docker/docker_nightly.py @@ -34,7 +34,7 @@ # Build Nightly images and append the date in the name try_and_handle(f"./build_image.sh -bt dev -t {organization}/{cpu_version}", dry_run) try_and_handle( - f"./build_image.sh -bt dev -g -cv cu102 -t {organization}/{gpu_version}", + f"./build_image.sh -bt dev -g -cv cu116 -t {organization}/{gpu_version}", dry_run, ) From 6e15536c21aa599521dabee5dc92635b77fed994 Mon Sep 17 00:00:00 2001 From: agunapal Date: Thu, 17 Nov 2022 22:49:38 +0000 Subject: [PATCH 05/20] Updated default ubuntu version to be 20.04 wherever applicable --- docker/build_image.sh | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docker/build_image.sh b/docker/build_image.sh index e1016b6036..d657326907 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -42,7 +42,7 @@ do -g|--gpu) MACHINE=gpu DOCKER_TAG="pytorch/torchserve:latest-gpu" - BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu18.04" + BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04" CUDA_VERSION="cu116" shift ;; @@ -65,21 +65,21 @@ do BUILD_WITH_IPEX=true shift ;; - # With default ubuntu version 18.04 + # With default ubuntu version 20.04 -cv|--cudaversion) CUDA_VERSION="$2" if [ $CUDA_VERSION == "cu117" ]; then - BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu18.04" + BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04" elif [ $CUDA_VERSION == "cu116" ]; then - BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu18.04" + BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04" elif [ $CUDA_VERSION == "cu113" ]; then - BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-runtime-ubuntu18.04" + BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-runtime-ubuntu20.04" elif [ $CUDA_VERSION == "cu111" ]; then - BASE_IMAGE="nvidia/cuda:11.1.1-cudnn8-runtime-ubuntu18.04" + BASE_IMAGE="nvidia/cuda:11.1.1-cudnn8-runtime-ubuntu20.04" elif [ $CUDA_VERSION == "cu102" ]; then BASE_IMAGE="nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04" @@ -99,7 +99,10 @@ do # CUDA 10 is not supported on Ubuntu 20.04 -ub|--ubuntu) UBUNTU_VERSION="$2" - if [[ $CUDA_VERSION == "cu116" && $UBUNTU_VERSION == "ubuntu20.04" ]]; + if [[ $CUDA_VERSION == "cu117" && $UBUNTU_VERSION == "ubuntu20.04" ]]; + then + BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04" + elif [[ $CUDA_VERSION == "cu116" && $UBUNTU_VERSION == "ubuntu20.04" ]]; then BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04" elif [[ $CUDA_VERSION == "cu113" && $UBUNTU_VERSION == "ubuntu20.04" ]]; From 97f29cabe3a0176f9cc754629dadd041f5f6b56d Mon Sep 17 00:00:00 2001 From: agunapal Date: Thu, 17 Nov 2022 23:07:11 +0000 Subject: [PATCH 06/20] Updated default CUDA version to CUDA 11.6 --- benchmarks/auto_benchmark.py | 132 ++++++++++++++++++++++------------- 1 file changed, 83 insertions(+), 49 deletions(-) diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py index 586dc470ea..8a7375f70a 100644 --- a/benchmarks/auto_benchmark.py +++ b/benchmarks/auto_benchmark.py @@ -1,20 +1,20 @@ import argparse import datetime import os -import ruamel.yaml import shutil from subprocess import Popen -from utils import gen_model_config_json -from utils import gen_md_report -from utils import gen_metrics_json + +import ruamel.yaml +from utils import gen_md_report, gen_metrics_json, gen_model_config_json CWD = os.getcwd() -MODEL_JSON_CONFIG_PATH = CWD + '/model_json_config' -BENCHMARK_TMP_PATH = '/tmp/benchmark' -BENCHMARK_REPORT_PATH = '/tmp/ts_benchmark' -TS_LOGS_PATH = CWD + '/logs' -MODEL_STORE = '/tmp/model_store' -WF_STORE = '/tmp/wf_store' +MODEL_JSON_CONFIG_PATH = CWD + "/model_json_config" +BENCHMARK_TMP_PATH = "/tmp/benchmark" +BENCHMARK_REPORT_PATH = "/tmp/ts_benchmark" +TS_LOGS_PATH = CWD + "/logs" +MODEL_STORE = "/tmp/model_store" +WF_STORE = "/tmp/wf_store" + class BenchmarkConfig: def __init__(self, yaml_dict, skip_ts_install): @@ -22,9 +22,10 @@ def __init__(self, yaml_dict, skip_ts_install): self.skip_ts_install = skip_ts_install self.bm_config = {} yesterday = datetime.date.today() - datetime.timedelta(days=1) - self.bm_config["version"] = \ - "torchserve-nightly=={}.{}.{}".format(yesterday.year, yesterday.month, yesterday.day) - self.bm_config["hardware"] = 'cpu' + self.bm_config["version"] = "torchserve-nightly=={}.{}.{}".format( + yesterday.year, yesterday.month, yesterday.day + ) + self.bm_config["hardware"] = "cpu" def ts_version(self, version): for k, v in version.items(): @@ -48,15 +49,15 @@ def metrics_cmd(self, cmd): for k, v in key_value.items(): if k == "cmd": cmd_options.append(v) - elif k == '--namespace': + elif k == "--namespace": cmd_options.append(k) - cmd_options.append(''.join(v)) + cmd_options.append("".join(v)) else: cmd_options.append(k) cmd_options.append(v) break - self.bm_config["metrics_cmd"] = ' '.join(cmd_options) + self.bm_config["metrics_cmd"] = " ".join(cmd_options) def report_cmd(self, cmd): cmd_options = [] @@ -70,12 +71,14 @@ def report_cmd(self, cmd): today = datetime.date.today() v[i] = "{}-{}-{}".format(today.year, today.month, today.day) break - cmd_options.append('{}/{}'.format('/'.join(v), self.bm_config["version"])) + cmd_options.append( + "{}/{}".format("/".join(v), self.bm_config["version"]) + ) else: cmd_options.append(v) break - self.bm_config["report_cmd"] = ' '.join(cmd_options) + self.bm_config["report_cmd"] = " ".join(cmd_options) def load_config(self): report_cmd = None @@ -91,10 +94,11 @@ def load_config(self): elif k == "report_cmd": report_cmd = v - self.bm_config["model_config_path"] = \ - '{}/cpu'.format(MODEL_JSON_CONFIG_PATH) \ - if self.bm_config["hardware"] == 'cpu' \ - else '{}/gpu'.format(MODEL_JSON_CONFIG_PATH) + self.bm_config["model_config_path"] = ( + "{}/cpu".format(MODEL_JSON_CONFIG_PATH) + if self.bm_config["hardware"] == "cpu" + else "{}/gpu".format(MODEL_JSON_CONFIG_PATH) + ) if self.skip_ts_install: self.bm_config["version"] = get_torchserve_version() @@ -105,9 +109,10 @@ def load_config(self): for k, v in self.bm_config.items(): print("{}={}".format(k, v)) + def load_benchmark_config(bm_config_path, skip_ts_install): yaml = ruamel.yaml.YAML() - with open(bm_config_path, 'r') as f: + with open(bm_config_path, "r") as f: yaml_dict = yaml.load(f) benchmark_config = BenchmarkConfig(yaml_dict, skip_ts_install) @@ -115,57 +120,64 @@ def load_benchmark_config(bm_config_path, skip_ts_install): return benchmark_config.bm_config + def benchmark_env_setup(bm_config, skip_ts_install): install_torchserve(skip_ts_install, bm_config["hardware"], bm_config["version"]) setup_benchmark_path(bm_config["model_config_path"]) build_model_json_config(bm_config["models"]) + def install_torchserve(skip_ts_install, hw, ts_version): if skip_ts_install: return # git checkout branch if it is needed - cmd = 'git checkout master && git reset --hard && git clean -dffx . && git pull --rebase' + cmd = "git checkout master && git reset --hard && git clean -dffx . && git pull --rebase" execute(cmd, wait=True) print("successfully reset git") ts_install_cmd = None - if ts_version.startswith("torchserve==") or ts_version.startswith("torchserve-nightly=="): - ts_install_cmd = 'pip install {}'.format(ts_version) + if ts_version.startswith("torchserve==") or ts_version.startswith( + "torchserve-nightly==" + ): + ts_install_cmd = "pip install {}".format(ts_version) else: - cmd = 'git checkout {}'.format(ts_version) + cmd = "git checkout {}".format(ts_version) execute(cmd, wait=True) # install_dependencies.py - if hw == 'gpu': - cmd = 'python ts_scripts/install_dependencies.py --environment dev --cuda cu102' + if hw == "gpu": + cmd = "python ts_scripts/install_dependencies.py --environment dev --cuda cu116" else: - cmd = 'python ts_scripts/install_dependencies.py --environment dev' + cmd = "python ts_scripts/install_dependencies.py --environment dev" execute(cmd, wait=True) print("successfully install install_dependencies.py") # install torchserve if ts_install_cmd is None: - ts_install_cmd = 'python ts_scripts/install_from_src.py' + ts_install_cmd = "python ts_scripts/install_from_src.py" execute(ts_install_cmd, wait=True) print("successfully install torchserve") + def setup_benchmark_path(model_config_path): benchmark_path_list = [BENCHMARK_TMP_PATH, BENCHMARK_REPORT_PATH, model_config_path] for benchmark_path in benchmark_path_list: shutil.rmtree(benchmark_path, ignore_errors=True) os.makedirs(benchmark_path, exist_ok=True) - print('successfully setup benchmark_path={}'.format(benchmark_path)) + print("successfully setup benchmark_path={}".format(benchmark_path)) + def build_model_json_config(models): for model in models: - if model.startswith('/'): + if model.startswith("/"): input_file = model else: - input_file = CWD + '/benchmarks/models_config/{}'.format(model) + input_file = CWD + "/benchmarks/models_config/{}".format(model) gen_model_config_json.convert_yaml_to_json(input_file, MODEL_JSON_CONFIG_PATH) + def run_benchmark(bm_config): files = os.listdir(bm_config["model_config_path"]) files.sort() @@ -174,17 +186,20 @@ def run_benchmark(bm_config): # call benchmark-ab.py shutil.rmtree(TS_LOGS_PATH, ignore_errors=True) shutil.rmtree(BENCHMARK_TMP_PATH, ignore_errors=True) - cmd = 'python ./benchmarks/benchmark-ab.py --tmp_dir /tmp --report_location /tmp --config_properties ' \ - './benchmarks/config.properties --config {}/{}'\ - .format(bm_config["model_config_path"], model_json_config) + cmd = ( + "python ./benchmarks/benchmark-ab.py --tmp_dir /tmp --report_location /tmp --config_properties " + "./benchmarks/config.properties --config {}/{}".format( + bm_config["model_config_path"], model_json_config + ) + ) execute(cmd, wait=True) # generate stats metrics from ab_report.csv - bm_model = model_json_config[0: -len('.json')] + bm_model = model_json_config[0 : -len(".json")] gen_metrics_json.gen_metric( - '{}/ab_report.csv'.format(BENCHMARK_TMP_PATH), - '{}/logs/stats_metrics.json'.format(BENCHMARK_TMP_PATH) + "{}/ab_report.csv".format(BENCHMARK_TMP_PATH), + "{}/logs/stats_metrics.json".format(BENCHMARK_TMP_PATH), ) # load stats metrics to remote metrics storage @@ -192,49 +207,63 @@ def run_benchmark(bm_config): execute(bm_config["metrics_cmd"], wait=True) # cp benchmark logs to local - bm_model_log_path = '{}/{}'.format(BENCHMARK_REPORT_PATH, bm_model) + bm_model_log_path = "{}/{}".format(BENCHMARK_REPORT_PATH, bm_model) os.makedirs(bm_model_log_path, exist_ok=True) - csv_file = '{}/ab_report.csv'.format(BENCHMARK_TMP_PATH) + csv_file = "{}/ab_report.csv".format(BENCHMARK_TMP_PATH) if os.path.exists(csv_file): shutil.move(csv_file, bm_model_log_path) - cmd = 'tar -cvzf {}/benchmark.tar.gz {}'.format(bm_model_log_path, BENCHMARK_TMP_PATH) + cmd = "tar -cvzf {}/benchmark.tar.gz {}".format( + bm_model_log_path, BENCHMARK_TMP_PATH + ) execute(cmd, wait=True) - cmd = 'tar -cvzf {}/logs.tar.gz {}'.format(bm_model_log_path, TS_LOGS_PATH) + cmd = "tar -cvzf {}/logs.tar.gz {}".format(bm_model_log_path, TS_LOGS_PATH) execute(cmd, wait=True) print("finish benchmark {}".format(bm_model)) # generate final report gen_md_report.iterate_subdir( BENCHMARK_REPORT_PATH, - '{}/report.md'.format(BENCHMARK_REPORT_PATH), + "{}/report.md".format(BENCHMARK_REPORT_PATH), bm_config["hardware"], - bm_config["version"]) + bm_config["version"], + ) print("report.md is generated") # load logs to remote storage if "report_cmd" in bm_config: execute(bm_config["report_cmd"], wait=True) + def clean_up_benchmark_env(bm_config): shutil.rmtree(BENCHMARK_TMP_PATH, ignore_errors=True) shutil.rmtree(MODEL_JSON_CONFIG_PATH, ignore_errors=True) shutil.rmtree(MODEL_STORE, ignore_errors=True) shutil.rmtree(WF_STORE, ignore_errors=True) + def execute(command, wait=False, stdout=None, stderr=None, shell=True): print("execute: {}".format(command)) - cmd = Popen(command, shell=shell, close_fds=True, stdout=stdout, stderr=stderr, universal_newlines=True) + cmd = Popen( + command, + shell=shell, + close_fds=True, + stdout=stdout, + stderr=stderr, + universal_newlines=True, + ) if wait: cmd.wait() return cmd + def get_torchserve_version(): # fetch the torchserve version from version.txt file - with open(os.path.join(CWD, 'ts', 'version.txt'), 'r') as file: + with open(os.path.join(CWD, "ts", "version.txt"), "r") as file: version = file.readline().rstrip() return version + def main(): parser = argparse.ArgumentParser() @@ -250,12 +279,17 @@ def main(): ) arguments = parser.parse_args() - skip_ts_config = False if arguments.skip is not None and arguments.skip.lower() == "false" else True + skip_ts_config = ( + False + if arguments.skip is not None and arguments.skip.lower() == "false" + else True + ) bm_config = load_benchmark_config(arguments.input, skip_ts_config) benchmark_env_setup(bm_config, skip_ts_config) run_benchmark(bm_config) clean_up_benchmark_env(bm_config) print("benchmark_serving.sh finished successfully.") + if __name__ == "__main__": main() From 795c7696ef403190f72e2da8551ff678743e0cd0 Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 01:38:45 +0000 Subject: [PATCH 07/20] Updated docker to CUDA 11.7 as default --- benchmarks/auto_benchmark.py | 2 +- docker/README.md | 2 +- docker/build_image.sh | 4 ++-- docker/docker_nightly.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py index 8a7375f70a..7918cfdd0f 100644 --- a/benchmarks/auto_benchmark.py +++ b/benchmarks/auto_benchmark.py @@ -147,7 +147,7 @@ def install_torchserve(skip_ts_install, hw, ts_version): # install_dependencies.py if hw == "gpu": - cmd = "python ts_scripts/install_dependencies.py --environment dev --cuda cu116" + cmd = "python ts_scripts/install_dependencies.py --environment dev --cuda cu117" else: cmd = "python ts_scripts/install_dependencies.py --environment dev" execute(cmd, wait=True) diff --git a/docker/README.md b/docker/README.md index 20d1e55e2e..35ffc18b43 100644 --- a/docker/README.md +++ b/docker/README.md @@ -36,7 +36,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p |-g, --gpu|Build image with GPU based ubuntu base image| |-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, codebuild| |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.| -|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`. Default `cu116`| +|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`. Default `cu117`| |-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.| |--codebuild| Set if you need [AWS CodeBuild](https://aws.amazon.com/codebuild/)| diff --git a/docker/build_image.sh b/docker/build_image.sh index d657326907..d086340212 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -42,8 +42,8 @@ do -g|--gpu) MACHINE=gpu DOCKER_TAG="pytorch/torchserve:latest-gpu" - BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04" - CUDA_VERSION="cu116" + BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04" + CUDA_VERSION="cu117" shift ;; -bt|--buildtype) diff --git a/docker/docker_nightly.py b/docker/docker_nightly.py index ba3917887a..edc844f950 100644 --- a/docker/docker_nightly.py +++ b/docker/docker_nightly.py @@ -34,7 +34,7 @@ # Build Nightly images and append the date in the name try_and_handle(f"./build_image.sh -bt dev -t {organization}/{cpu_version}", dry_run) try_and_handle( - f"./build_image.sh -bt dev -g -cv cu116 -t {organization}/{gpu_version}", + f"./build_image.sh -bt dev -g -cv cu117 -t {organization}/{gpu_version}", dry_run, ) From e8b59ad1c1c13e44ca621ead93ca5a5d126c1b8a Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 01:43:05 +0000 Subject: [PATCH 08/20] Removed ubuntu arg from docker build --- docker/build_image.sh | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/docker/build_image.sh b/docker/build_image.sh index d086340212..0e8494e953 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -22,7 +22,6 @@ do echo "-g, --gpu specify to use gpu" echo "-bt, --buildtype specify to created image for codebuild. Possible values: production, dev, codebuild." echo "-cv, --cudaversion specify to cuda version to use" - echo "-ub, --ubuntu specify ubuntu version. Possible values: ubuntu:20.04" echo "-t, --tag specify tag name for docker image" echo "-lf, --use-local-serve-folder specify this option for the benchmark image if the current 'serve' folder should be used during automated benchmarks" echo "-ipex, --build-with-ipex specify to build with intel_extension_for_pytorch" @@ -96,33 +95,6 @@ do shift shift ;; - # CUDA 10 is not supported on Ubuntu 20.04 - -ub|--ubuntu) - UBUNTU_VERSION="$2" - if [[ $CUDA_VERSION == "cu117" && $UBUNTU_VERSION == "ubuntu20.04" ]]; - then - BASE_IMAGE="nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04" - elif [[ $CUDA_VERSION == "cu116" && $UBUNTU_VERSION == "ubuntu20.04" ]]; - then - BASE_IMAGE="nvidia/cuda:11.6.0-cudnn8-runtime-ubuntu20.04" - elif [[ $CUDA_VERSION == "cu113" && $UBUNTU_VERSION == "ubuntu20.04" ]]; - then - BASE_IMAGE="nvidia/cuda:11.3.0-cudnn8-runtime-ubuntu20.04" - elif [[ $CUDA_VERSION == "cu111" && $UBUNTU_VERSION == "ubuntu20.04" ]]; - then - BASE_IMAGE="nvidia/cuda:11.1.1-cudnn8-runtime-ubuntu20.04" - elif [[ $UBUNTU_VERSION == "ubuntu20.04" ]]; - then - echo "Using CPU image" - BASE_IMAGE="ubuntu:20.04" - else - echo "Ubuntu and CUDA version combination is not supported" - echo $UBUNTU_VERSION - echo $CUDA_VERSION - exit 1 - fi - shift - ;; esac done From 169665b7af1da6b832ae1311f4e0baf3a886542d Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 02:40:37 +0000 Subject: [PATCH 09/20] Added github action for cpu regression tests --- .github/workflows/regression_tests_cpu.yml | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/regression_tests_cpu.yml diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml new file mode 100644 index 0000000000..909714d6b3 --- /dev/null +++ b/.github/workflows/regression_tests_cpu.yml @@ -0,0 +1,26 @@ +name: Run Regression Tests on CPU + +on: workflow_dispatch + +jobs: + nightly: + # creates workflows for the 3 OS: ubuntu, macOS & windows + runs-on: ${{ matrix.os }} + strategy: + fail-fast: true + matrix: + os: [ubuntu-20.04, macOS-latest, windows-latest] + steps: + - name: Setup Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: 3.8 + architecture: x64 + - name: Checkout TorchServe + uses: actions/checkout@v3 + - name: Install dependencies + run: | + python ts_scripts/install_dependencies.py --environment=dev + - name: Torchserve Regression Tests + run: | + python test/regression_tests.py From 483bb5b6a0eeaddeb3c430003fc8d35725c54dd2 Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 02:43:52 +0000 Subject: [PATCH 10/20] Added github action for cpu regression tests --- .github/workflows/regression_tests_cpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index 909714d6b3..cc5c2d1256 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -1,6 +1,6 @@ name: Run Regression Tests on CPU -on: workflow_dispatch +on: push jobs: nightly: From 932e00c6c1d09074657e469a8a9c8f77c032b03c Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 02:45:01 +0000 Subject: [PATCH 11/20] Added github action for cpu regression tests --- .github/workflows/regression_tests_cpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index cc5c2d1256..477fbebfaa 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -3,7 +3,7 @@ name: Run Regression Tests on CPU on: push jobs: - nightly: + regression-cpu: # creates workflows for the 3 OS: ubuntu, macOS & windows runs-on: ${{ matrix.os }} strategy: From 48211bbf26a33558d0943f63ad7060629135c10b Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 03:05:54 +0000 Subject: [PATCH 12/20] Added github action for cpu regression tests --- .github/workflows/regression_tests_cpu.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index 477fbebfaa..b6a52db67c 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -7,7 +7,6 @@ jobs: # creates workflows for the 3 OS: ubuntu, macOS & windows runs-on: ${{ matrix.os }} strategy: - fail-fast: true matrix: os: [ubuntu-20.04, macOS-latest, windows-latest] steps: From 6d9c1d4f3d62990f0a3bc2c55990c9ed4f4c1a1c Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 03:24:29 +0000 Subject: [PATCH 13/20] Added gpu regression tests action --- .github/workflows/regression_tests_gpu.yml | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/regression_tests_gpu.yml diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml new file mode 100644 index 0000000000..6207c7f799 --- /dev/null +++ b/.github/workflows/regression_tests_gpu.yml @@ -0,0 +1,26 @@ +name: Run Regression Tests on GPU + +on: push + +jobs: + regression-gpu: + # creates workflows for the 2 OS: ubuntu & windows + runs-on: [self-hosted, ci-gpu-2] + strategy: + fail-fast: false + matrix: + cuda: ["cu116", "cu117"] + steps: + - name: Setup Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: 3.8 + architecture: x64 + - name: Checkout TorchServe + uses: actions/checkout@v3 + - name: Install dependencies + run: | + python ts_scripts/install_dependencies.py --environment=dev --cuda=${{ matrix.cuda }} + - name: Torchserve Regression Tests + run: | + python test/regression_tests.py From 6c2accfe714f2283b5bdb21e0cca092feefe476f Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 03:24:44 +0000 Subject: [PATCH 14/20] Added gpu regression tests action --- .github/workflows/regression_tests_cpu.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index b6a52db67c..b078cc3d1d 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -7,6 +7,7 @@ jobs: # creates workflows for the 3 OS: ubuntu, macOS & windows runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-20.04, macOS-latest, windows-latest] steps: From ab09c41266801cb2fbe03c68dac34e092c1e155e Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 19:29:24 +0000 Subject: [PATCH 15/20] change runner --- .github/workflows/regression_tests_cpu.yml | 2 +- .github/workflows/regression_tests_gpu.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index b078cc3d1d..ac503dfdf4 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -1,6 +1,6 @@ name: Run Regression Tests on CPU -on: push +on: workflow_dispatch jobs: regression-cpu: diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml index 6207c7f799..7a8e903d3a 100644 --- a/.github/workflows/regression_tests_gpu.yml +++ b/.github/workflows/regression_tests_gpu.yml @@ -5,7 +5,7 @@ on: push jobs: regression-gpu: # creates workflows for the 2 OS: ubuntu & windows - runs-on: [self-hosted, ci-gpu-2] + runs-on: [self-hosted, ci-gpu] strategy: fail-fast: false matrix: From a6e30a23223298fac8fa80737306bf32d6b55b1a Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 20:34:42 +0000 Subject: [PATCH 16/20] added java 17 to github actions --- .github/workflows/regression_tests_cpu.yml | 7 ++++++- .github/workflows/regression_tests_gpu.yml | 13 +++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index ac503dfdf4..8181701e87 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -1,6 +1,6 @@ name: Run Regression Tests on CPU -on: workflow_dispatch +on: push jobs: regression-cpu: @@ -16,6 +16,11 @@ jobs: with: python-version: 3.8 architecture: x64 + - name: Setup Java 17 + uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '17' - name: Checkout TorchServe uses: actions/checkout@v3 - name: Install dependencies diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml index 7a8e903d3a..a35d06f996 100644 --- a/.github/workflows/regression_tests_gpu.yml +++ b/.github/workflows/regression_tests_gpu.yml @@ -11,11 +11,24 @@ jobs: matrix: cuda: ["cu116", "cu117"] steps: + - name: Clean up previous run + run: | + echo "Cleaning up previous run" + cd $RUNNER_WORKSPACE + pwd + cd .. + pwd + rm -rf _tool - name: Setup Python 3.8 uses: actions/setup-python@v3 with: python-version: 3.8 architecture: x64 + - name: Setup Java 17 + uses: actions/setup-java@v3 + with: + distribution: 'zulu' + java-version: '17' - name: Checkout TorchServe uses: actions/checkout@v3 - name: Install dependencies From cd1f0984e6c6a46616626cd5766f69302738af17 Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 22:05:23 +0000 Subject: [PATCH 17/20] update git version --- .github/workflows/regression_tests_cpu.yml | 2 +- .github/workflows/regression_tests_gpu.yml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index 8181701e87..117ea7a6ee 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -1,6 +1,6 @@ name: Run Regression Tests on CPU -on: push +on: workflow_dipatch jobs: regression-cpu: diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml index a35d06f996..84debdb99c 100644 --- a/.github/workflows/regression_tests_gpu.yml +++ b/.github/workflows/regression_tests_gpu.yml @@ -19,6 +19,12 @@ jobs: cd .. pwd rm -rf _tool + - name: Check git version + run: git --version + - name: Update git + run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt-get update && sudo apt-get install git -y + - name: Check git version + run: git --version - name: Setup Python 3.8 uses: actions/setup-python@v3 with: From c73137ed1ba1919704be00fa89dac58c90fcd380 Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 23:41:33 +0000 Subject: [PATCH 18/20] Verified GPU regression tests to be working --- .github/workflows/regression_tests_gpu.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/regression_tests_gpu.yml b/.github/workflows/regression_tests_gpu.yml index 84debdb99c..acc81d1e6b 100644 --- a/.github/workflows/regression_tests_gpu.yml +++ b/.github/workflows/regression_tests_gpu.yml @@ -1,10 +1,10 @@ name: Run Regression Tests on GPU -on: push +on: workflow_dispatch jobs: regression-gpu: - # creates workflows for the 2 OS: ubuntu & windows + # creates workflows for CUDA 11.6 & CUDA 11.7 on ubuntu runs-on: [self-hosted, ci-gpu] strategy: fail-fast: false @@ -19,8 +19,6 @@ jobs: cd .. pwd rm -rf _tool - - name: Check git version - run: git --version - name: Update git run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt-get update && sudo apt-get install git -y - name: Check git version From ba6b4d0bc5df17db01f148e02d988479da02cc44 Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 23:42:48 +0000 Subject: [PATCH 19/20] Skipping regression tests on windows --- .github/workflows/regression_tests_cpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index 117ea7a6ee..7f9168032b 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macOS-latest, windows-latest] + os: [ubuntu-20.04, macOS-latest] steps: - name: Setup Python 3.8 uses: actions/setup-python@v3 From 23fb33bbf9bb6f7ed7b0940002eef740a0405cbd Mon Sep 17 00:00:00 2001 From: agunapal Date: Fri, 18 Nov 2022 23:44:47 +0000 Subject: [PATCH 20/20] Skipping regression tests on windows --- .github/workflows/regression_tests_cpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/regression_tests_cpu.yml b/.github/workflows/regression_tests_cpu.yml index 7f9168032b..82df959a7f 100644 --- a/.github/workflows/regression_tests_cpu.yml +++ b/.github/workflows/regression_tests_cpu.yml @@ -4,7 +4,7 @@ on: workflow_dipatch jobs: regression-cpu: - # creates workflows for the 3 OS: ubuntu, macOS & windows + # creates workflows for OS: ubuntu, macOS runs-on: ${{ matrix.os }} strategy: fail-fast: false