ci: add benchmarks/run_tests.sh (pytorch#6043)

Tested locally with: ``` $ test/benchmarks/run_tests.sh -L [...] + make -C /src/pytorch/xla/test/benchmarks all make: Entering directory '/src/pytorch/xla/test/benchmarks' AGGREGATE --accelerator=a6000 --test=inference --report=speedup DIFF a6000.inference.speedup.test RM a6000.inference.speedup.test.tmp AGGREGATE --accelerator=v100 --test=inference --report=histogram DIFF v100.inference.histogram.test RM v100.inference.histogram.test.tmp AGGREGATE --accelerator=v100 --test=inference --report=latest DIFF v100.inference.latest.test RM v100.inference.latest.test.tmp AGGREGATE --accelerator=v100 --test=inference --report=speedup DIFF v100.inference.speedup.test RM v100.inference.speedup.test.tmp make: Leaving directory '/src/pytorch/xla/test/benchmarks' $ test/benchmarks/run_tests.sh -L -V 1 [...] + make -C /src/pytorch/xla/test/benchmarks V=1 all make: Entering directory '/src/pytorch/xla/test/benchmarks' python3 ../../benchmarks/aggregate.py --accelerator=a6000 --test=inference --report=speedup \ --input-dirname=. --format=csv > a6000.inference.speedup.test.tmp git diff --no-index a6000.inference.speedup.test a6000.inference.speedup.test.tmp rm -f a6000.inference.speedup.test.tmp python3 ../../benchmarks/aggregate.py --accelerator=v100 --test=inference --report=histogram \ --input-dirname=. --format=csv > v100.inference.histogram.test.tmp git diff --no-index v100.inference.histogram.test v100.inference.histogram.test.tmp rm -f v100.inference.histogram.test.tmp python3 ../../benchmarks/aggregate.py --accelerator=v100 --test=inference --report=latest \ --input-dirname=. --format=csv > v100.inference.latest.test.tmp git diff --no-index v100.inference.latest.test v100.inference.latest.test.tmp rm -f v100.inference.latest.test.tmp python3 ../../benchmarks/aggregate.py --accelerator=v100 --test=inference --report=speedup \ --input-dirname=. --format=csv > v100.inference.speedup.test.tmp git diff --no-index v100.inference.speedup.test v100.inference.speedup.test.tmp rm -f v100.inference.speedup.test.tmp make: Leaving directory '/src/pytorch/xla/test/benchmarks' ```
mbzomowski-test-org · Dec 8, 2023 · 0313c6a · 0313c6a
1 parent 599b79f
commit 0313c6a
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 7 deletions.
diff --git a/.circleci/common.sh b/.circleci/common.sh
@@ -196,6 +196,13 @@ function run_torch_xla_cpp_tests() {
   popd
 }
 
+function run_torch_xla_benchmark_tests() {
+  XLA_DIR=$1
+  pushd $XLA_DIR
+    echo "Running Benchmark Tests"
+    test/benchmarks/run_tests.sh -L""
+}
+
 function run_torch_xla_tests() {
   PYTORCH_DIR=$1
   XLA_DIR=$2
@@ -212,13 +219,16 @@ function run_torch_xla_tests() {
   export CXX_ABI=$(python -c "import torch;print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
 
   # TODO(yeounoh) test coverage workflow is not parallelized.
-  if [[ -z "$RUN_CPP_TESTS1" && -z "$RUN_CPP_TESTS2" && -z "$RUN_PYTHON_TESTS" || "$USE_COVERAGE" != "0" ]]; then
+  if [[ -z "$RUN_BENCHMARK_TESTS" && -z "$RUN_CPP_TESTS1" && -z "$RUN_CPP_TESTS2" && -z "$RUN_PYTHON_TESTS" || "$USE_COVERAGE" != "0" ]]; then
     run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
     run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
+    run_torch_xla_benchmark_tests $XLA_DIR
   else
-    # run python and cpp tests separately.
+    # run tests separately.
     if [[ "$RUN_PYTHON_TESTS" == "python_tests" ]]; then
       run_torch_xla_python_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
+    elif [[ "$RUN_BENCHMARK_TESTS" == "benchmark_tests" ]]; then
+      run_torch_xla_benchmark_tests $XLA_DIR
     else
       run_torch_xla_cpp_tests $PYTORCH_DIR $XLA_DIR $USE_COVERAGE
     fi

diff --git a/.github/workflows/_test.yml b/.github/workflows/_test.yml
@@ -50,6 +50,7 @@ jobs:
       matrix:
         include:
           # Use readable strings as they define the workflow titles.
+          - run_benchmark_tests: 'benchmark_tests'
           - run_cpp_tests1: 'cpp_tests1'
           - run_cpp_tests2: 'cpp_tests2'
           - run_python_tests: 'python_tests'
@@ -69,6 +70,7 @@ jobs:
       XLA_SKIP_XRT_TESTS: ${{ inputs.disable-xrt }}
       XLA_SKIP_TORCH_OP_TESTS: ${{ inputs.disable-pjrt }}
       XLA_SKIP_MP_OP_TESTS: ${{ inputs.disable-pjrt }}
+      RUN_BENCHMARK_TESTS: ${{ matrix.run_benchmark_tests }}
       RUN_CPP_TESTS1: ${{ matrix.run_cpp_tests1 }}
       RUN_CPP_TESTS2: ${{ matrix.run_cpp_tests2 }}
       RUN_PYTHON_TESTS: ${{ matrix.run_python_tests }}
@@ -110,7 +112,7 @@ jobs:
         run: |
           echo "DOCKER_IMAGE: ${DOCKER_IMAGE}"
           docker pull "${DOCKER_IMAGE}"
-          pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_XRT_TESTS -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}")
+          pid=$(docker run --shm-size=16g ${GPU_FLAG:-} -e USE_COVERAGE -e XLA_SKIP_XRT_TESTS -e XLA_SKIP_TORCH_OP_TESTS -e XLA_SKIP_MP_OP_TESTS -e RUN_BENCHMARK_TESTS -e RUN_CPP_TESTS1 -e RUN_CPP_TESTS2 -e RUN_PYTHON_TESTS -e RUN_XLA_OP_TESTS1 -e RUN_XLA_OP_TESTS2 -e RUN_XLA_OP_TESTS3 -e RUN_TORCH_MP_OP_TESTS -t -d -w "$WORKDIR" "${DOCKER_IMAGE}")
           echo "${GCLOUD_SERVICE_KEY}" | docker exec -i "${pid}" sh -c "cat >> /tmp/pytorch/xla/default_credentials.json"
           echo "pid=${pid}" >> "${GITHUB_ENV}"
       - name: Test

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -1,7 +1,7 @@
 # Benchmarking
 
-The two main benchmarking scripts are 
-  - `experiment_runner.py` to run benchmark experiments, and 
+The two main benchmarking scripts are
+  - `experiment_runner.py` to run benchmark experiments, and
   - `result_analyzer.py` to aggregate the benchmark result in CSV form.
 
 
@@ -18,9 +18,9 @@ git apply benchmarks/patches/mismatched_batch_size.patch
 
 And replace the `current_device_name` with your actual accelerator name.
 
-## Reducing benchmark noise 
+## Reducing benchmark noise
 
-It is important to keep the benchmark runs safe from external effects 
+It is important to keep the benchmark runs safe from external effects
 to reduce noise. Do the following:
 
 Sets the CPU statically to the highest tuneable frequency.
@@ -174,3 +174,8 @@ files. (Note: to reiterate, because we are plotting data from single day,
 Inductor gets speedup == 1 for all benchmarks). This plot also shows the
 correctness gap between Pytorch/XLA and Inductor; there are benchmarks that do
 run on Inductor but not on Pytorch/XLA.
+
+## Continuous Integration Tests
+
+Benchmark-related tests run by CI are located at `xla/test/benchmarks`.
+To run the tests locally, do `$ make -C xla/test/benchmarks`.
diff --git a/test/benchmarks/run_tests.sh b/test/benchmarks/run_tests.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+set -ex
+CDIR="$(cd "$(dirname "$0")" ; pwd -P)"
+LOGFILE=/tmp/pytorch_benchmarks_test.log
+VERBOSITY=0
+
+# Note [Keep Going]
+#
+# Set the `CONTINUE_ON_ERROR` flag to `true` to make the CircleCI tests continue on error.
+# This will allow you to see all the failures on your PR, not stopping with the first
+# test failure like the default behavior.
+CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-0}"
+if [[ "$CONTINUE_ON_ERROR" == "1" ]]; then
+  set +e
+fi
+
+while getopts 'LV:' OPTION
+do
+  case $OPTION in
+    L)
+      LOGFILE=
+      ;;
+    V)
+      VERBOSITY=$OPTARG
+      ;;
+  esac
+done
+shift $(($OPTIND - 1))
+
+function run_make_tests {
+  MAKE_V=""
+  if [ "$VERBOSITY" != "0" ]; then
+    MAKE_V="V=$VERBOSITY"
+  fi
+  make -C $CDIR $MAKE_V all
+}
+
+function run_tests {
+  run_make_tests
+}
+
+if [ "$LOGFILE" != "" ]; then
+  run_tests 2>&1 | tee $LOGFILE
+else
+  run_tests
+fi