From 8db318b0c0c984ea114136dedb8b63f6c1d0cb2d Mon Sep 17 00:00:00 2001
From: John lee <johnleenimh@gmail.com>
Date: Thu, 23 May 2024 11:01:25 +0100
Subject: [PATCH] Retry tests (#3229)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* retry tests

* retry with pytest last failed logic

greatly speeds up reruns of tests as only previously failed tests are
rerun.

define pytest cachedir for each pytest invocation to prevent interaction
between different selections of tests.

protect against exit code of 5 when a previous pytest invocation
had no failed tests which results in all tests being deselected.

use eval to avoid issues with the -k and -m expansions.

* tidy test scripts

* set correct root dir

* add option to treat unrun tests as failures

* interpret sigterm as sigint

* adjust timeouts

* respond to comments and add comments

---------

Co-authored-by: leej3 <“johnleenimh@gmail.com>
Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 .github/workflows/gpu-hvd-tests.yml         |   8 +-
 .github/workflows/gpu-tests.yml             |  21 ++--
 .github/workflows/hvd-tests.yml             |  10 +-
 .github/workflows/pytorch-version-tests.yml |  16 +--
 .github/workflows/tpu-tests.yml             |  20 ++--
 .github/workflows/unit-tests.yml            |  15 ++-
 tests/common-test-functionality.sh          | 102 ++++++++++++++++++++
 tests/ignite/conftest.py                    |  96 ++++++++++++++++++
 tests/run_cpu_tests.sh                      |  33 ++++---
 tests/run_gpu_tests.sh                      |  50 ++++++----
 tests/run_multinode_tests_in_docker.sh      |   2 +-
 tests/run_tpu_tests.sh                      |  16 ++-
 12 files changed, 316 insertions(+), 73 deletions(-)
 create mode 100644 tests/common-test-functionality.sh

diff --git a/.github/workflows/gpu-hvd-tests.yml b/.github/workflows/gpu-hvd-tests.yml
index 6661f46b501..2017cf8acda 100644
--- a/.github/workflows/gpu-hvd-tests.yml
+++ b/.github/workflows/gpu-hvd-tests.yml
@@ -22,7 +22,7 @@ jobs:
   gpu-hvd-tests:
     strategy:
       matrix:
-        pytorch-channel: [pytorch, ]
+        pytorch-channel: [pytorch]
       fail-fast: false
     env:
       DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
@@ -128,8 +128,8 @@ jobs:
           # Can't build Horovod with recent pytorch due to pytorch required C++17 standard
           # and horovod is still using C++14
           # HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]
-          # Using a similar hack as described here: 
-          # https://github.com/horovod/horovod/issues/3941#issuecomment-1732505345 
+          # Using a similar hack as described here:
+          # https://github.com/horovod/horovod/issues/3941#issuecomment-1732505345
           git clone --recursive https://github.com/horovod/horovod.git /horovod
           cd /horovod
           sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt
@@ -152,7 +152,7 @@ jobs:
           set -xe
 
           bash tests/run_gpu_tests.sh 2 hvd
-          CUDA_VISIBLE_DEVICES="" pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ -m distributed -k hvd
+          CUDA_VISIBLE_DEVICES="" pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ignite -m distributed -k hvd
 
           EOF
           )
diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
index 92345b3baed..faa84deffd9 100644
--- a/.github/workflows/gpu-tests.yml
+++ b/.github/workflows/gpu-tests.yml
@@ -29,7 +29,7 @@ jobs:
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
     runs-on: linux.8xlarge.nvidia.gpu
-    timeout-minutes: 45
+    timeout-minutes: 85
 
     steps:
       - name: Clean workspace
@@ -121,18 +121,13 @@ jobs:
 
       - name: Run GPU Unit Tests
         continue-on-error: false
-        run: |
-
-          script=$(cat << EOF
-
-          set -xe
-
-          bash tests/run_gpu_tests.sh 2
-
-          EOF
-          )
-
-          docker exec -t pthd /bin/bash -c "${script}"
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 25
+          shell: bash
+          command: docker exec -t pthd /bin/bash -xec 'tests/run_gpu_tests.sh 2'
+          new_command_on_retry: docker exec -e USE_LAST_FAILED=1 -t pthd /bin/bash -xec 'tests/run_gpu_tests.sh 2'
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/hvd-tests.yml b/.github/workflows/hvd-tests.yml
index f483d21f38e..35e107f888b 100644
--- a/.github/workflows/hvd-tests.yml
+++ b/.github/workflows/hvd-tests.yml
@@ -75,9 +75,13 @@ jobs:
           target_dir: /tmp
 
       - name: Run Tests
-        shell: bash -l {0}
-        run: |
-          bash tests/run_cpu_tests.sh
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 15
+          shell: bash
+          command: bash tests/run_cpu_tests.sh
+          new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_cpu_tests.sh
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml
index e47f8faaa46..2e0ad5e0c98 100644
--- a/.github/workflows/pytorch-version-tests.yml
+++ b/.github/workflows/pytorch-version-tests.yml
@@ -10,7 +10,7 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
-    timeout-minutes: 45
+    timeout-minutes: 85
     strategy:
       max-parallel: 5
       fail-fast: false
@@ -18,7 +18,7 @@ jobs:
         python-version: [3.8, 3.9, "3.10"]
         pytorch-version:
           [2.1.2, 2.0.1, 1.13.1, 1.12.1, 1.11.0, 1.10.0, 1.9.1, 1.8.1, 1.5.1]
-        exclude:            
+        exclude:
           - pytorch-version: 1.5.1
             python-version: 3.9
           - pytorch-version: 1.5.1
@@ -78,7 +78,7 @@ jobs:
           pip install -r requirements-dev.txt
           python setup.py install
 
-          # pytorch>=1.9.0,<1.11.0 is using "from setuptools import distutils; distutils.version.LooseVersion" anti-pattern 
+          # pytorch>=1.9.0,<1.11.0 is using "from setuptools import distutils; distutils.version.LooseVersion" anti-pattern
           # which raises the error: AttributeError: module 'distutils' has no attribute 'version' for setuptools>59
           bad_pth_version=$(python -c "import torch; print('.'.join(torch.__version__.split('.')[:2]) in ['1.9', '1.10'])")
           if [ "${bad_pth_version}" == "True" ]; then
@@ -92,9 +92,13 @@ jobs:
           target_dir: /tmp
 
       - name: Run Tests
-        shell: bash -l {0}
-        run: |
-          bash tests/run_cpu_tests.sh "not test_time_profilers"
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 15
+          shell: bash
+          command: bash tests/run_cpu_tests.sh "not test_time_profilers"
+          new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_cpu_tests.sh "not test_time_profilers"
 
   # create-issue:
   #   runs-on: ubuntu-latest
diff --git a/.github/workflows/tpu-tests.yml b/.github/workflows/tpu-tests.yml
index 08eaaf30d8f..ab14ad3c1de 100644
--- a/.github/workflows/tpu-tests.yml
+++ b/.github/workflows/tpu-tests.yml
@@ -89,13 +89,19 @@ jobs:
           target_dir: /tmp
 
       - name: Run Tests
-        run: |
-          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${Python_ROOT_DIR}/lib
-          export XRT_DEVICE_MAP="CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0"
-          export XRT_WORKERS="localservice:0;grpc://localhost:40934"
-
-          python -c "import torch_xla; print('torch xla version:', torch_xla.__version__)"
-          bash tests/run_tpu_tests.sh
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 25
+          shell: bash
+          command: |
+            python -c "import torch_xla; print('torch xla version:', torch_xla.__version__)"
+            bash tests/run_tpu_tests.sh
+          new_command_on_retry: USE_LAST_FAILED=1 bash tests/run_tpu_tests.sh
+        env:
+          LD_LIBRARY_PATH: ${{ env.LD_LIBRARY_PATH }}:${{ env.Python_ROOT_DIR }}/lib
+          XRT_DEVICE_MAP: "CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0"
+          XRT_WORKERS: "localservice:0;grpc://localhost:40934"
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index a4b69725569..0b94e0d0e9e 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -31,7 +31,7 @@ concurrency:
 jobs:
   cpu-tests:
     runs-on: ${{ matrix.os }}
-    timeout-minutes: 45
+    timeout-minutes: 85
     defaults:
       run:
         shell: bash
@@ -40,7 +40,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11","3.12"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         pytorch-channel: [pytorch, pytorch-nightly]
         include:
           # includes a single build on windows
@@ -102,7 +102,7 @@ jobs:
 
       - name: Run Mypy
         # https://github.com/pytorch/ignite/pull/2780
-        # 
+        #
         if: ${{ matrix.os == 'ubuntu-latest' && matrix.pytorch-channel == 'pytorch-nightly'}}
         run: |
           bash ./tests/run_code_style.sh mypy
@@ -120,8 +120,13 @@ jobs:
           cp -R /tmp/MNIST .
 
       - name: Run Tests
-        run: |
-          SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 5
+          timeout_minutes: 15
+          shell: bash
+          command: SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
+          new_command_on_retry: USE_LAST_FAILED=1 SKIP_DISTRIB_TESTS=${{ matrix.skip-distrib-tests }} bash tests/run_cpu_tests.sh
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/tests/common-test-functionality.sh b/tests/common-test-functionality.sh
new file mode 100644
index 00000000000..daf9d284f6b
--- /dev/null
+++ b/tests/common-test-functionality.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Will catch exit code 5 when tests are deselected from previous passing run
+# (relevent for --last-failed-no-failures none)
+last_failed_no_failures_code=5
+
+#  functions shared across test files
+run_tests() {
+    # Set defaults
+    local core_args="-vvv tests/ignite"
+    local cache_dir=".unknown-cache"
+    local skip_distrib_tests=1
+    local match_tests_expression=""
+    local trap_deselected_exit_code=1
+    local use_last_failed=0
+    local use_coverage=0
+    local world_size=0
+    # Always clean up pytest.ini
+    trap 'rm -f pytest.ini' RETURN
+    # Parse arguments
+    while [[ $# -gt 0 ]]
+    do
+        key="$1"
+        case $key in
+            --core_args)
+            core_args="$2"
+            shift
+            shift
+            ;;
+            --cache_dir)
+            cache_dir="$2"
+            shift
+            shift
+            ;;
+            --skip_distrib_tests)
+            skip_distrib_tests="$2"
+            shift
+            shift
+            ;;
+            --match_tests_expression)
+            match_tests_expression="$2"
+            shift
+            shift
+            ;;
+            --trap_deselected_exit_code)
+            trap_deselected_exit_code="$2"
+            shift
+            shift
+            ;;
+            --use_last_failed)
+            use_last_failed="$2"
+            shift
+            shift
+            ;;
+            --use_coverage)
+            use_coverage="$2"
+            shift
+            shift
+            ;;
+            --world_size)
+            world_size="$2"
+            shift
+            shift
+            ;;
+            *)
+            echo "Error: Unknown argument $key"
+            exit 1
+            shift
+            ;;
+        esac
+    done
+
+    if [ "${skip_distrib_tests}" -eq "1" ]; then
+        # can be overwritten by core_args
+        skip_distrib_opt="-m 'not distributed and not tpu and not multinode_distributed'"
+    else
+        skip_distrib_opt=""
+    fi
+
+
+    echo [pytest] > pytest.ini ; echo "cache_dir=${cache_dir}" >> pytest.ini
+
+    # Assemble options for the pytest command
+    pytest_args="${skip_distrib_opt} ${core_args} --treat-unrun-as-failed -k '${match_tests_expression}'"
+    if [ "${use_last_failed:-0}" -eq "1" ] && [ -d "${cache_dir}" ]; then
+        pytest_args="--last-failed --last-failed-no-failures none ${pytest_args}"
+    fi
+    if [ "${use_coverage}" -eq "1" ]; then
+        pytest_args="--cov ignite --cov-append --cov-report term-missing --cov-report xml ${pytest_args}"
+    fi
+    if [ ! "${world_size}" -eq "0" ]; then
+        export WORLD_SIZE="${world_size}"
+        pytest_args="--dist=each --tx ${WORLD_SIZE}*popen//python=python ${pytest_args}"
+    fi
+
+    # Run the command
+    if [ "$trap_deselected_exit_code" -eq "1" ]; then
+        CUDA_VISIBLE_DEVICES="" eval "pytest ${pytest_args}" || { exit_code=$?; if [ "$exit_code" -eq ${last_failed_no_failures_code} ]; then echo "All tests deselected"; else exit $exit_code; fi; }
+    else
+        CUDA_VISIBLE_DEVICES="" eval "pytest ${pytest_args}"
+    fi
+}
diff --git a/tests/ignite/conftest.py b/tests/ignite/conftest.py
index 265ae97e3e7..d5546a75bae 100644
--- a/tests/ignite/conftest.py
+++ b/tests/ignite/conftest.py
@@ -1,8 +1,10 @@
 import functools
 import os
 import shutil
+import signal
 import sys
 import tempfile
+import threading
 import time
 from pathlib import Path
 
@@ -13,10 +15,57 @@
 import ignite.distributed as idist
 
 
+def pytest_addoption(parser):
+    """
+    Add custom command line options for the ignite test suite here.
+    See:
+    This function is a pytest hook (due to its name) and is *"automatically"
+    executed at the start of a test run
+    https://docs.pytest.org/en/latest/reference/reference.html#initialization-hooks
+
+    * "automatically" is true provided this conftest.py file is the
+    root directory. See:
+    https://docs.pytest.org/en/latest/reference/customize.html#initialization-determining-rootdir-and-configfile
+    """
+    parser.addoption(
+        "--treat-unrun-as-failed",
+        action="store_true",
+        help="""
+        If a session is interrupted, treat the unrun tests as failed so that a
+        rerun with --last-failed runs any tests that have not passed or been
+        skipped. Note that if all tests in a module have been skipped, the
+        module will be skipped for all subsequent runs.
+        """,
+    )
+
+
 def pytest_configure(config):
+    """
+    This function is a pytest hook (due to its name) and is run after command
+    line parsing is complete in order to configure the test session.
+    """
     config.addinivalue_line("markers", "distributed: run distributed")
     config.addinivalue_line("markers", "multinode_distributed: distributed")
     config.addinivalue_line("markers", "tpu: run on tpu")
+    if config.option.treat_unrun_as_failed:
+        unrun_tracker = UnrunTracker()
+        config.pluginmanager.register(unrun_tracker, "unrun_tracker_plugin")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def term_handler():
+    """
+    This allows the pytest session to be terminated upon retries on CI. It may
+    be worth using this fixture solely in that context. For a discussion on
+    whether sigterm should be ignored and why pytest usually ignores it see:
+    https://github.com/pytest-dev/pytest/issues/5243
+    """
+    if threading.current_thread() is threading.main_thread() and hasattr(signal, "SIGTERM"):
+        orig = signal.signal(signal.SIGTERM, signal.getsignal(signal.SIGINT))
+        yield
+        signal.signal(signal.SIGTERM, orig)
+    else:
+        yield  # Just pass through if SIGTERM isn't supported or we are not in the main thread
 
 
 @pytest.fixture(
@@ -447,6 +496,40 @@ def distributed(request, local_rank, world_size):
         raise RuntimeError(f"Invalid parameter value for `distributed` fixture, given {request.param}")
 
 
+class UnrunTracker:
+    """
+    Keeps track of unrun tests to improve the user experience when using the
+    "--last-failed" pytest option and a test session is interrupted. This is
+    particularly useful on CI when rerunning "failing" tests where the failure
+    was due to a deadlock and many tests weren't actually run so they didn't
+    actually fail. This is a pytest plugin that implements some standard hooks
+    to modify the test session. Its functionality can be added to a test session
+    by registering it with the pytest plugin manager.
+    """
+
+    def __init__(self):
+        self.unrun_tests = []
+
+    def pytest_collection_finish(self, session):
+        # At the end of the collection, add all items to the unrun_tests list
+        self.unrun_tests.extend(session.items)
+
+    def pytest_runtest_teardown(self, item):
+        if item in self.unrun_tests:
+            self.unrun_tests.remove(item)
+
+    def record_unrun_as_failed(self, session, exitstatus):
+        # Get current lastfailed entries (if any)
+        lastfailed = session.config.cache.get("cache/lastfailed", {})
+
+        # Add unrun tests to lastfailed
+        for test in self.unrun_tests:
+            lastfailed[test.nodeid] = True
+
+        # Update the cache with the new lastfailed
+        session.config.cache.set("cache/lastfailed", lastfailed)
+
+
 @pytest.hookimpl
 def pytest_pyfunc_call(pyfuncitem: pytest.Function) -> None:
     if any(fx in pyfuncitem.fixturenames for fx in ["distributed", "multinode_distributed"]):
@@ -508,3 +591,16 @@ def xla_worker(index, fn):
                 assert ex_.code == 0, "Didn't successfully exit in XLA test"
 
         pyfuncitem.obj = functools.partial(testfunc_wrapper, pyfuncitem.obj)
+
+
+def pytest_sessionfinish(session, exitstatus):
+    """
+    Any functionality that should be run at the end of the session should be
+    added here.
+    This is a pytest hook (due to its name) and is called after the whole test
+    run finished, right before returning the exit status to the system.
+    """
+    # If requested by the user, track all unrun tests and add them to the lastfailed cache
+    if session.config.option.treat_unrun_as_failed:
+        unrun_tracker = session.config.pluginmanager.get_plugin("unrun_tracker_plugin")
+        unrun_tracker.record_unrun_as_failed(session, exitstatus)
diff --git a/tests/run_cpu_tests.sh b/tests/run_cpu_tests.sh
index 2297be94219..7d647de1e01 100644
--- a/tests/run_cpu_tests.sh
+++ b/tests/run_cpu_tests.sh
@@ -1,22 +1,31 @@
 #!/bin/bash
-
+source "$(dirname "$0")/common-test-functionality.sh"
 set -xeu
 
-if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then
-    skip_distrib_opt=(-m "not distributed and not tpu and not multinode_distributed")
-else
-    skip_distrib_opt=(-m "")
-fi
+skip_distrib_tests=${SKIP_DISTRIB_TESTS:-0}
+use_last_failed=${USE_LAST_FAILED:-0}
+match_tests_expression=${1:-""}
 
-MATCH_TESTS_EXPRESSION=${1:-""}
 
-CUDA_VISIBLE_DEVICES="" pytest --tx 4*popen//python=python --cov ignite --cov-report term-missing --cov-report xml -vvv tests "${skip_distrib_opt[@]}" -k "$MATCH_TESTS_EXPRESSION"
+run_tests \
+    --core_args "--tx 4*popen//python=python -vvv tests/ignite" \
+    --cache_dir ".cpu-not-distrib" \
+    --skip_distrib_tests "${skip_distrib_tests}" \
+    --use_coverage 1 \
+    --match_tests_expression "${match_tests_expression}" \
+    --use_last_failed ${use_last_failed}
 
 # https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02
-if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then
+if [ "${skip_distrib_tests}" -eq "1" ]; then
     exit 0
 fi
 
-export WORLD_SIZE=2
-CUDA_VISIBLE_DEVICES="" pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx $WORLD_SIZE*popen//python=python tests -m distributed -vvv -k "$MATCH_TESTS_EXPRESSION"
-unset WORLD_SIZE
+# Run 2 processes with --dist=each
+run_tests \
+    --core_args "-m distributed -vvv tests/ignite" \
+    --world_size 2 \
+    --cache_dir ".cpu-distrib" \
+    --skip_distrib_tests 0 \
+    --use_coverage 1 \
+    --match_tests_expression "${match_tests_expression}" \
+    --use_last_failed ${use_last_failed}
diff --git a/tests/run_gpu_tests.sh b/tests/run_gpu_tests.sh
index 3146443a531..371c70aee37 100644
--- a/tests/run_gpu_tests.sh
+++ b/tests/run_gpu_tests.sh
@@ -1,35 +1,47 @@
 #!/bin/bash
+source "$(dirname "$0")/common-test-functionality.sh"
+set -xeu
 
-if [ -z "$1" ]; then
-    ngpus=1
-else
-    ngpus=$1
-fi
-
-MATCH_TESTS_EXPRESSION=${2:-""}
+skip_distrib_tests=${SKIP_DISTRIB_TESTS:-1}
+use_last_failed=${USE_LAST_FAILED:-0}
+ngpus=${1:-1}
 
-if [ -z "$MATCH_TESTS_EXPRESSION" ]; then
+match_tests_expression=${2:-""}
+if [ -z "$match_tests_expression" ]; then
     cuda_pattern="cuda"
 else
-    cuda_pattern="cuda and $MATCH_TESTS_EXPRESSION"
+    cuda_pattern="cuda and $match_tests_expression"
 fi
 
-set -xeu
-
-pytest --cov ignite --cov-report term-missing --cov-report xml -vvv tests/ -k "$cuda_pattern"
+run_tests \
+    --core_args "-vvv tests/ignite" \
+    --cache_dir ".gpu-cuda" \
+    --skip_distrib_tests "${skip_distrib_tests}" \
+    --use_coverage 1 \
+    --match_tests_expression "${cuda_pattern}" \
+    --use_last_failed ${use_last_failed}
 
 # https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02
-if [ "${SKIP_DISTRIB_TESTS:-0}" -eq "1" ]; then
+if [ "${skip_distrib_tests}" -eq "1" ]; then
     exit 0
 fi
 
-pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml -vvv tests/ -m distributed -k "$MATCH_TESTS_EXPRESSION"
+run_tests \
+    --core_args "-vvv -m distributed tests/ignite" \
+    --cache_dir ".gpu-distrib" \
+    --skip_distrib_tests 0 \
+    --use_coverage 1 \
+    --match_tests_expression "${match_tests_expression}" \
+    --use_last_failed ${use_last_failed}
 
 
 if [ ${ngpus} -gt 1 ]; then
-
-    export WORLD_SIZE=${ngpus}
-    pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml --dist=each --tx ${WORLD_SIZE}*popen//python=python tests -m distributed -vvv -k "$MATCH_TESTS_EXPRESSION"
-    unset WORLD_SIZE
-
+    run_tests \
+        --core_args "-vvv -m distributed tests/ignite" \
+        --world_size "${ngpus}" \
+        --cache_dir ".gpu-distrib-multi" \
+        --skip_distrib_tests 0 \
+        --use_coverage 1 \
+        --match_tests_expression "${match_tests_expression}" \
+        --use_last_failed ${use_last_failed}
 fi
diff --git a/tests/run_multinode_tests_in_docker.sh b/tests/run_multinode_tests_in_docker.sh
index 0dca1b60327..041284bb97c 100644
--- a/tests/run_multinode_tests_in_docker.sh
+++ b/tests/run_multinode_tests_in_docker.sh
@@ -36,7 +36,7 @@ RUN pip install --no-cache-dir mock pytest pytest-xdist scikit-learn scikit-imag
 EOF
 
 docker_python_version=`docker run --rm -i $docker_image python -c "import sys; print(str(sys.version_info[0]) + \".\" + str(sys.version_info[1]), end=\"\")"`
-cmd="pytest --dist=each --tx $nproc_per_node*popen//python${docker_python_version} -m multinode_distributed -vvv tests"
+cmd="pytest --dist=each --tx $nproc_per_node*popen//python${docker_python_version} -m multinode_distributed -vvv tests/ignite"
 
 export MASTER_ADDR=node0
 export MASTER_PORT=9999
diff --git a/tests/run_tpu_tests.sh b/tests/run_tpu_tests.sh
index 0877de858ae..5ea0f993173 100644
--- a/tests/run_tpu_tests.sh
+++ b/tests/run_tpu_tests.sh
@@ -1,10 +1,20 @@
 #!/bin/bash
-
+source "$(dirname "$0")/common-test-functionality.sh"
 set -xeu
+use_last_failed=${USE_LAST_FAILED:-0}
+
+run_tests \
+    --core_args "-vvv -m tpu tests/ignite" \
+    --cache_dir ".tpu" \
+    --use_coverage 1 \
+    --use_last_failed ${use_last_failed}
 
-pytest --cov ignite --cov-report term-missing --cov-report xml tests/ -vvv -m tpu
 
 if [ -z ${NUM_TPU_WORKERS+x} ]; then
     export NUM_TPU_WORKERS=1
-    pytest --cov ignite --cov-append --cov-report term-missing --cov-report xml tests/ -vvv -m tpu
+    run_tests \
+        --core_args "-vvv -m tpu tests/ignite" \
+        --cache_dir ".tpu-multi" \
+        --use_coverage 1 \
+        --use_last_failed ${use_last_failed}
 fi