From 26977043bf72acbb4be704af5c6153212d1a282d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Wed, 1 Dec 2021 20:58:18 +0100
Subject: [PATCH] Add separate CI job for slow tests (#10830)

---
 .github/workflows/ci_test-slow.yml      | 83 +++++++++++++++++++++++++
 tests/benchmarks/generate_comparison.py |  6 +-
 tests/conftest.py                       | 27 +++++---
 tests/helpers/runif.py                  |  9 +++
 tests/loops/test_loops.py               |  3 +-
 tests/utilities/test_auto_restart.py    | 33 +++++-----
 6 files changed, 127 insertions(+), 34 deletions(-)
 create mode 100644 .github/workflows/ci_test-slow.yml

diff --git a/.github/workflows/ci_test-slow.yml b/.github/workflows/ci_test-slow.yml
new file mode 100644
index 0000000000000..b9a07c4b6a26f
--- /dev/null
+++ b/.github/workflows/ci_test-slow.yml
@@ -0,0 +1,83 @@
+name: Test
+
+# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
+on:  # Trigger the workflow on push or pull request, but only for the master branch
+  push:
+    branches: [master, "release/*"]
+  pull_request:
+    branches: [master, "release/*"]
+
+jobs:
+  slow:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macOS-latest]
+        # same config as '.azure-pipelines/gpu-tests.yml'
+        python-version: ["3.7"]
+        pytorch-version: ["1.8"]
+
+    timeout-minutes: 20
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Weekly reset caching
+      run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
+      id: times
+
+    - name: Get pip cache
+      id: pip-cache
+      run: |
+        python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)"
+
+    - name: Cache pip
+      uses: actions/cache@v2
+      with:
+        path: ${{ steps.pip-cache.outputs.dir }}
+        key: ${{ runner.os }}-pip-td${{ steps.times.outputs.period }}-py${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-td${{ steps.times.outputs.period }}-py${{ matrix.python-version }}-
+
+    - name: Install dependencies
+      run: |
+        # adjust versions according installed Torch version
+        python ./requirements/adjust_versions.py requirements.txt ${{ matrix.pytorch-version }}
+        pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
+        pip install --requirement requirements/test.txt
+        pip list
+      shell: bash
+
+    - name: Tests
+      run: |
+        coverage run --source pytorch_lightning -m pytest tests -v --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}.xml
+      env:
+        PL_RUN_SLOW_TESTS: 1
+
+    - name: Upload pytest test results
+      uses: actions/upload-artifact@v2
+      with:
+        name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}
+        path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}.xml
+      if: failure()
+
+    - name: Statistics
+      if: success()
+      run: |
+        coverage report
+        coverage xml
+
+    - name: Upload coverage to Codecov
+      uses: codecov/codecov-action@v1
+      if: success()
+      # see: https://github.com/actions/toolkit/issues/399
+      continue-on-error: true
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        file: coverage.xml
+        flags: cpu,pytest,torch${{ matrix.pytorch-version }}
+        name: CPU-coverage
+        fail_ci_if_error: false
diff --git a/tests/benchmarks/generate_comparison.py b/tests/benchmarks/generate_comparison.py
index bc95b5d9cf591..984ffdd19c163 100644
--- a/tests/benchmarks/generate_comparison.py
+++ b/tests/benchmarks/generate_comparison.py
@@ -13,9 +13,6 @@
 # limitations under the License.
 import os
 
-import matplotlib.pylab as plt
-import pandas as pd
-
 from tests.benchmarks.test_basic_parity import measure_loops
 from tests.helpers.advanced_models import ParityModuleMNIST, ParityModuleRNN
 
@@ -27,6 +24,9 @@
 
 
 def _main():
+    import matplotlib.pylab as plt
+    import pandas as pd
+
     fig, axarr = plt.subplots(nrows=len(MODEL_CLASSES))
 
     for i, cls_model in enumerate(MODEL_CLASSES):
diff --git a/tests/conftest.py b/tests/conftest.py
index 5871921e6c3dd..ae3c6435515f1 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -191,13 +191,20 @@ def single_process_pg():
 
 
 def pytest_collection_modifyitems(items):
-    if os.getenv("PL_RUN_STANDALONE_TESTS", "0") != "1":
-        return
-    # filter out non-standalone tests
-    items[:] = [
-        item
-        for item in items
-        for marker in item.own_markers
-        # has `@RunIf(standalone=True)`
-        if marker.name == "skipif" and marker.kwargs.get("standalone")
-    ]
+    # filter out special tests
+    if os.getenv("PL_RUN_STANDALONE_TESTS", "0") == "1":
+        items[:] = [
+            item
+            for item in items
+            for marker in item.own_markers
+            # has `@RunIf(standalone=True)`
+            if marker.name == "skipif" and marker.kwargs.get("standalone")
+        ]
+    elif os.getenv("PL_RUN_SLOW_TESTS", "0") == "1":
+        items[:] = [
+            item
+            for item in items
+            for marker in item.own_markers
+            # has `@RunIf(slow=True)`
+            if marker.name == "skipif" and marker.kwargs.get("slow")
+        ]
diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py
index 179535a63dd2c..22da59d9e6983 100644
--- a/tests/helpers/runif.py
+++ b/tests/helpers/runif.py
@@ -73,6 +73,7 @@ def __new__(
         skip_49370: bool = False,
         skip_hanging_spawn: bool = False,
         omegaconf: bool = False,
+        slow: bool = False,
         **kwargs,
     ):
         """
@@ -97,6 +98,7 @@ def __new__(
             skip_49370: Skip the test as it's impacted by https://github.com/pytorch/pytorch/issues/49370.
             skip_hanging_spawn: Skip the test as it's impacted by hanging loggers on spawn.
             omegaconf: Require that omry/omegaconf is installed.
+            slow: Mark the test as slow, our CI will run it in a separate job.
             **kwargs: Any :class:`pytest.mark.skipif` keyword arguments.
         """
         conditions = []
@@ -195,6 +197,13 @@ def __new__(
             conditions.append(not _OMEGACONF_AVAILABLE)
             reasons.append("omegaconf")
 
+        if slow:
+            env_flag = os.getenv("PL_RUN_SLOW_TESTS", "0")
+            conditions.append(env_flag != "1")
+            reasons.append("Slow test")
+            # used in tests/conftest.py::pytest_collection_modifyitems
+            kwargs["slow"] = True
+
         reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
         return pytest.mark.skipif(
             *args, condition=any(conditions), reason=f"Requires: [{' + '.join(reasons)}]", **kwargs
diff --git a/tests/loops/test_loops.py b/tests/loops/test_loops.py
index 6338ed00e481d..b1f93d82ab616 100644
--- a/tests/loops/test_loops.py
+++ b/tests/loops/test_loops.py
@@ -907,8 +907,7 @@ def val_dataloader(self):
 
 @RunIf(min_torch="1.8.0")
 @pytest.mark.parametrize("should_fail", [False, True])
-# False is de-activated due to slowness
-@pytest.mark.parametrize("persistent_workers", [True])
+@pytest.mark.parametrize("persistent_workers", [pytest.param(False, marks=RunIf(slow=True)), True])
 def test_workers_are_shutdown(tmpdir, should_fail, persistent_workers):
     # `num_workers == 1` uses `_MultiProcessingDataLoaderIter`
     # `persistent_workers` makes sure `self._iterator` gets set on the `DataLoader` instance
diff --git a/tests/utilities/test_auto_restart.py b/tests/utilities/test_auto_restart.py
index 4c2c440797dd2..e33bc91621a2b 100644
--- a/tests/utilities/test_auto_restart.py
+++ b/tests/utilities/test_auto_restart.py
@@ -177,9 +177,8 @@ def test_fast_forward_on_sequential_sampler():
     assert next(batch_sampler_iter) == [6, 7, 8]
 
 
-@pytest.mark.skipif(torch.cuda.is_available(), reason="todo (tchaton) Need more investigation")
 def test_fast_forward_on_random_sampler():
-    """This test ensures ``FastForwardSampler`` applied to ``RandomSampler`` correctly retrived the right next
+    """This test ensures ``FastForwardSampler`` applied to ``RandomSampler`` correctly retrieved the right next
     batch on restart."""
     seed = 42
     seed_everything(42)
@@ -255,8 +254,9 @@ def __next__(self):
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@pytest.mark.skipif(torch.cuda.is_available(), reason="This test takes around 30 sec and should be skipped in Azure CI")
-@pytest.mark.parametrize("num_workers", [0, 1, 2])
+@pytest.mark.parametrize(
+    "num_workers", [0, pytest.param(1, marks=RunIf(slow=True)), pytest.param(2, marks=RunIf(slow=True))]
+)
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 def test_fast_forward_sampler_over_iterable_dataset(num_workers):
     """This test ensures ``FastForwardSampler`` and ``CaptureIterableDataset`` are properly being used to capture
@@ -368,8 +368,7 @@ def _test_fast_forward_sampler_with_distributed_sampler(rank, worldsize):
     assert sampler.state_dict(num_yielded)[0]["current_iteration"] == 16
 
 
-@pytest.mark.skipif(torch.cuda.is_available(), reason="This test takes around 25 sec and should be skipped in Azure CI")
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, slow=True)
 def test_fast_forward_sampler_with_distributed_sampler():
     """Make sure result logging works with DDP."""
     tutils.set_random_main_port()
@@ -638,14 +637,13 @@ def all_gather(tensor, world_size):
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@pytest.mark.skipif(torch.cuda.is_available(), reason="This test takes around 45 sec and should be skipped in Azure CI")
+@RunIf(slow=True)
 def test_fast_forward_sampler_iterative_dataset():
     _test_fast_forward_sampler_with_distributed_sampler_and_iterative_dataset(0, 1)
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@pytest.mark.skipif(torch.cuda.is_available(), reason="This test takes around 55 sec and should be skipped in Azure CI")
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, slow=True)
 def test_fast_forward_sampler_with_distributed_sampler_and_iterative_dataset():
     """Make sure result logging works with DDP."""
     tutils.set_random_main_port()
@@ -668,9 +666,9 @@ def create_iterable_dataset(batch_size, num_workers, attr_name="iter_sampler", w
     return dataset
 
 
-@mock.patch("pytorch_lightning.trainer.data_loading._validate_fault_tolerant_automatic", lambda x, y: None)
+@mock.patch("pytorch_lightning.trainer.data_loading._validate_fault_tolerant_automatic")
 @pytest.mark.parametrize("use_fault_tolerant", ["0", "1"])
-def test_data_loading_wraps_dataset_and_samplers(use_fault_tolerant, tmpdir):
+def test_data_loading_wraps_dataset_and_samplers(_, tmpdir, use_fault_tolerant):
     """This test ensures the dataset and sampler are properly wrapped when fault tolerant is enabled."""
 
     class CustomBatchSampler(BatchSampler):
@@ -771,7 +769,7 @@ def __len__(self):
         # RandomGeneratorGetItemDataset,
     ],
 )
-@pytest.mark.parametrize("num_workers", [0])
+@pytest.mark.parametrize("num_workers", [0, pytest.param(2, marks=RunIf(slow=True))])
 @pytest.mark.parametrize("batch_size", [1, 2, 3])
 def test_dataset_rng_states_restart(dataset_class, num_workers, batch_size):
     """Test that the sequence of batches coming from a random number generator continues with the correct sequence
@@ -897,10 +895,7 @@ def _run_training(trainer_kwargs, dataset_classes, fail_on_step: int = -1, ckpt_
     return model.seen_batches, model.parameters()
 
 
-# this test will fail `fault_tolerant` don't support multiple datasets.
-# this tests works as the dataset is fully deterministic and therefore
-# there is not overall between the seeds.
-@mock.patch("pytorch_lightning.trainer.data_loading._validate_fault_tolerant_automatic", lambda x, y: None)
+@mock.patch("pytorch_lightning.trainer.data_loading._validate_fault_tolerant_automatic")
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 @pytest.mark.parametrize(
     "dataset_classes",
@@ -916,7 +911,7 @@ def _run_training(trainer_kwargs, dataset_classes, fail_on_step: int = -1, ckpt_
     ],
 )
 @pytest.mark.parametrize("multiple_trainloader_mode", ["min_size", "max_size_cycle"])
-def test_dataset_rng_states_restart_with_lightning(tmpdir, dataset_classes, multiple_trainloader_mode):
+def test_dataset_rng_states_restart_with_lightning(_, tmpdir, dataset_classes, multiple_trainloader_mode):
     """Test that the Trainer can resume from a failed run in the case of several types of datasets."""
     trainer_kwargs = dict(
         default_root_dir=tmpdir,
@@ -1384,10 +1379,10 @@ def test_collect_states_with_collection():
     assert generated == [{"a": {0: state}, "b": [{"a": {0: state}}]}]
 
 
-@pytest.mark.parametrize("num_workers", [0])
+# FIXME(@tchaton): >0 num_workers failing
+@pytest.mark.parametrize("num_workers", [0, pytest.param(2, marks=[RunIf(slow=True), pytest.mark.xfail()])])
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "2"})
 def test_stateful_workers(num_workers):
-
     seed_everything(42)
 
     _get_iterator_fn = DataLoader._get_iterator