pytorch · agunapal · Dec 9, 2023 · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023
diff --git a/.github/workflows/benchmark_torch_compile_nightly.yml b/.github/workflows/benchmark_torch_compile_nightly.yml
@@ -0,0 +1,43 @@
+name: Benchmark torch.compile models nightly
+
+on:
+  # run every day at 9:15pm
+  schedule:
+    - cron:  '15 21 * * *'
+
+jobs:
+  nightly:
+    strategy:
+      fail-fast: false
+    runs-on: [self-hosted, gpu]
+    timeout-minutes: 1320
+    steps:
+      - name: Clean up previous run
+        run: |
+          echo "Cleaning up previous run"
+          cd $RUNNER_WORKSPACE
+          pwd
+          cd ..
+          pwd
+          rm -rf _tool
+      - name: Setup Python 3.8
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+          architecture: x64
+      - name: Setup Java 17
+        uses: actions/setup-java@v3
+        with:
+          distribution: 'zulu'
+          java-version: '17'
+      - name: Checkout TorchServe
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      - name: Install dependencies
+        run: |
+          sudo apt-get update -y
+          sudo apt-get install -y apache2-utils
+          pip install -r benchmarks/requirements-ab.txt
+      - name: Benchmark gpu nightly
+        run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_torch_compile_gpu.yaml --skip false --nightly True
diff --git a/benchmarks/auto_benchmark.py b/benchmarks/auto_benchmark.py
@@ -122,14 +122,16 @@ def load_benchmark_config(bm_config_path, skip_ts_install, skip_upload):
     return benchmark_config.bm_config
 
 
-def benchmark_env_setup(bm_config, skip_ts_install):
-    install_torchserve(skip_ts_install, bm_config["hardware"], bm_config["version"])
+def benchmark_env_setup(bm_config, skip_ts_install, nightly):
+    install_torchserve(
+        skip_ts_install, bm_config["hardware"], bm_config["version"], nightly
+    )
     setup_benchmark_path(bm_config["model_config_path"])
     build_model_json_config(bm_config["models"])
     enable_launcher_with_logical_core(bm_config["hardware"])
 
 
-def install_torchserve(skip_ts_install, hw, ts_version):
+def install_torchserve(skip_ts_install, hw, ts_version, nightly):
     if skip_ts_install:
         return
 
@@ -154,6 +156,8 @@ def install_torchserve(skip_ts_install, hw, ts_version):
         cmd = "python ts_scripts/install_dependencies.py --environment dev --neuronx"
     else:
         cmd = "python ts_scripts/install_dependencies.py --environment dev"
+    if nightly:
+        cmd += " --nightly_torch"
     execute(cmd, wait=True)
     print("successfully install install_dependencies.py")
 
@@ -290,9 +294,12 @@ def main():
     )
     parser.add_argument(
         "--skip_upload",
-        help="true: skip uploading commands . default: false",
+        help="true: skip uploading commands. default: false",
+    )
+    parser.add_argument(
+        "--nightly",
+        help="true: install nightly version of torch package. default: false",
     )
-
     arguments = parser.parse_args()
     skip_ts_config = (
         False
@@ -304,8 +311,13 @@ def main():
         if arguments.skip_upload is not None and arguments.skip_upload.lower() == "true"
         else False
     )
+    nightly = (
+        True
+        if arguments.nightly is not None and arguments.nightly.lower() == "true"
+        else False
+    )
     bm_config = load_benchmark_config(arguments.input, skip_ts_config, skip_upload)
-    benchmark_env_setup(bm_config, skip_ts_config)
+    benchmark_env_setup(bm_config, skip_ts_config, nightly)
     run_benchmark(bm_config)
     clean_up_benchmark_env(bm_config)
     print("benchmark_serving.sh finished successfully.")

diff --git a/benchmarks/benchmark-ab.py b/benchmarks/benchmark-ab.py
@@ -229,7 +229,7 @@ def warm_up():
     click.secho("\n\nExecuting warm-up ...", fg="green")
 
     ab_cmd = (
-        f"ab -c {execution_params['concurrency']}  -n {execution_params['requests']/10} -k -p "
+        f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']/10} -k -p "
         f"{execution_params['tmp_dir']}/benchmark/input -T  {execution_params['content_type']} "
         f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
         f"{execution_params['result_file']}"
@@ -247,7 +247,7 @@ def run_benchmark():
 
     click.secho("\n\nExecuting inference performance tests ...", fg="green")
     ab_cmd = (
-        f"ab -c {execution_params['concurrency']}  -n {execution_params['requests']} -k -p "
+        f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']} -k -p "
         f"{execution_params['tmp_dir']}/benchmark/input -T  {execution_params['content_type']} "
         f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
         f"{execution_params['result_file']}"

diff --git a/benchmarks/benchmark_config_torch_compile_gpu.yaml b/benchmarks/benchmark_config_torch_compile_gpu.yaml
@@ -0,0 +1,48 @@
+# Torchserve version is to be installed. It can be one of the options
+#  - branch : "master"
+#  - nightly: "2022.3.16"
+#  - release: "0.5.3"
+# Nightly build will be installed if "ts_version" is not specifiged
+#ts_version:
+#    branch: &ts_version "master"
+
+# a list of model configure yaml files defined in benchmarks/models_config
+# or a list of model configure yaml files with full path
+models:
+    - "bert_torch_compile_gpu.yaml"
+    - "resnet50_torch_compile_gpu.yaml"
+    - "vgg16_torch_compile_gpu.yaml"
+
+# benchmark on "cpu" or "gpu".
+# "cpu" is set if "hardware" is not specified
+hardware: &hardware "gpu"
+
+# load prometheus metrics report to remote storage or local different path if "metrics_cmd" is set.
+# the command line to load prometheus metrics report to remote system.
+# Here is an example of AWS cloudwatch command:
+# Note:
+#    - keep the values order as the same as the command definition.
+#    - set up the command before enabling `metrics_cmd`.
+#      For example, aws client and AWS credentials need to be setup before trying this example.
+metrics_cmd:
+    - "cmd": "aws cloudwatch put-metric-data"
+    - "--namespace": ["torchserve_benchmark_nightly_torch_compile_", *hardware]
+    - "--region": "us-east-2"
+    - "--metric-data": 'file:///tmp/benchmark/logs/stats_metrics.json'
+
+# load report to remote storage or local different path if "report_cmd" is set.
+# the command line to load report to remote storage.
+# Here is an example of AWS cloudwatch command:
+# Note:
+#    - keep the values order as the same as the command.
+#    - set up the command before enabling `report_cmd`.
+#      For example, aws client, AWS credentials and S3 bucket
+#      need to be setup before trying this example.
+#    - "today()" is a keyword to apply current date in the path
+#      For example, the dest path in the following example is
+#      s3://torchserve-model-serving/benchmark/2022-03-18/gpu
+report_cmd:
+    - "cmd": "aws s3 cp --recursive"
+    - "source": '/tmp/ts_benchmark/'
+    - "dest": ['s3://torchserve-benchmark/torch-compile-nightly', "today()", *hardware]
+
diff --git a/benchmarks/models_config/bert_torch_compile_gpu.yaml b/benchmarks/models_config/bert_torch_compile_gpu.yaml
@@ -0,0 +1,42 @@
+---
+bert:
+    scripted_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/bert-scripted.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+            - 2
+            - 4
+            - 8
+            - 16
+        input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text_captum_input.txt"
+        requests: 50000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"
+    torch_compile_default_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/bert-default.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+            - 2
+            - 4
+            - 8
+            - 16
+        input: "./examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text_captum_input.txt"
+        requests: 50000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"
diff --git a/benchmarks/models_config/resnet50_torch_compile_gpu.yaml b/benchmarks/models_config/resnet50_torch_compile_gpu.yaml
@@ -0,0 +1,42 @@
+---
+resnet50:
+    scripted_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/resnet-50-scripted.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+            - 2
+            - 4
+            - 8
+            - 16
+        input: "./examples/image_classifier/kitten.jpg"
+        requests: 10000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"
+    torch_compile_default_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/resnet-50-default.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+            - 2
+            - 4
+            - 8
+            - 16
+        input: "./examples/image_classifier/kitten.jpg"
+        requests: 10000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"
diff --git a/benchmarks/models_config/vgg16_torch_compile_gpu.yaml b/benchmarks/models_config/vgg16_torch_compile_gpu.yaml
@@ -0,0 +1,42 @@
+---
+vgg16:
+    scripted_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/vgg-16-scripted.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+            - 2
+            - 4
+            - 8
+            - 16
+        input: "./examples/image_classifier/kitten.jpg"
+        requests: 10000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"
+    torch_compile_default_mode:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/vgg-16-default.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+            - 2
+            - 4
+            - 8
+            - 16
+        input: "./examples/image_classifier/kitten.jpg"
+        requests: 10000
+        concurrency: 100
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"