Lightning-AI · Borda · Mar 2, 2021 · Mar 2, 2021 · Mar 2, 2021 · Mar 2, 2021
@@ -84,7 +84,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
     assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
 
 
-@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
+@RunIf(min_gpus=2)
 @mock.patch.dict(
     os.environ, {
         "CUDA_VISIBLE_DEVICES": "0,1",

@@ -18,8 +18,8 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.trainer.states import TrainerState
-from pytorch_lightning.utilities import _TPU_AVAILABLE
 from tests.helpers.boring_model import BoringModel
+from tests.helpers.runif import RunIf
 from tests.helpers.utils import pl_multi_process_test
 
 
@@ -39,7 +39,7 @@ def forward(self, x):
         return x
 
 
-@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine")
+@RunIf(tpu=True)
 @pl_multi_process_test
 def test_resume_training_on_cpu(tmpdir):
     """ Checks if training can be resumed from a saved checkpoint on CPU"""
@@ -70,7 +70,7 @@ def test_resume_training_on_cpu(tmpdir):
     assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
 
 
-@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine")
+@RunIf(tpu=True)
 @pl_multi_process_test
 def test_if_test_works_after_train(tmpdir):
     """ Ensure that .test() works after .fit() """
@@ -82,7 +82,7 @@ def test_if_test_works_after_train(tmpdir):
     assert len(trainer.test(model)) == 1
 
 
-@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine")
+@RunIf(tpu=True)
 @pl_multi_process_test
 def test_weight_tying_warning(tmpdir, capsys=None):
     """
@@ -98,7 +98,7 @@ def test_weight_tying_warning(tmpdir, capsys=None):
         assert result
 
 
-@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine")
+@RunIf(tpu=True)
 @pl_multi_process_test
 def test_if_weights_tied(tmpdir, capsys=None):
     """

@@ -15,7 +15,6 @@
 import math
 import os
 import pickle
-import platform
 import re
 from argparse import Namespace
 from pathlib import Path
@@ -38,6 +37,7 @@
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
+from tests.helpers.runif import RunIf
 
 
 class LogInTwoMethods(BoringModel):
@@ -364,10 +364,7 @@ def on_train_end(self, trainer, pl_module):
             assert torch.save.call_count == 0
 
 
-@pytest.mark.skipif(
-    platform.system() == "Windows",
-    reason="Distributed training is not supported on Windows",
-)
+@RunIf(skip_windows=True)
 def test_model_checkpoint_no_extraneous_invocations(tmpdir):
     """Test to ensure that the model callback saves the checkpoints only once in distributed mode."""
     model = LogInTwoMethods()

@@ -19,7 +19,21 @@
 import torch
 from pkg_resources import get_distribution
 
-from pytorch_lightning.utilities import _APEX_AVAILABLE, _NATIVE_AMP_AVAILABLE, _TORCH_QUANTIZE_AVAILABLE
+from pytorch_lightning.utilities import (
+    _APEX_AVAILABLE,
+    _HOROVOD_AVAILABLE,
+    _NATIVE_AMP_AVAILABLE,
+    _TORCH_QUANTIZE_AVAILABLE,
+    _TPU_AVAILABLE,
+)
+
+try:
+    from horovod.common.util import nccl_built
+    nccl_built()
+except (ImportError, ModuleNotFoundError, AttributeError):
+    _HOROVOD_NCCL_AVAILABLE = False
+finally:
+    _HOROVOD_NCCL_AVAILABLE = True
 
 
 class RunIf:
@@ -37,9 +51,13 @@ def __new__(
         *args,
         min_gpus: int = 0,
         min_torch: Optional[str] = None,
+        min_python: Optional[str] = None,
         quantization: bool = False,
         amp_apex: bool = False,
         amp_native: bool = False,
+        tpu: bool = False,
+        horovod: bool = False,
+        horovod_nccl: bool = False,
         skip_windows: bool = False,
         **kwargs
     ):
@@ -48,9 +66,13 @@ def __new__(
             args: native pytest.mark.skipif arguments
             min_gpus: min number of gpus required to run test
             min_torch: minimum pytorch version to run test
+            min_python: minimum python version required to run test
             quantization: if `torch.quantization` package is required to run test
             amp_apex: NVIDIA Apex is installed
             amp_native: if native PyTorch native AMP is supported
+            tpu: if TPU is available
+            horovod: if Horovod is installed
+            horovod_nccl: if Horovod is installed with NCCL support
             skip_windows: skip test for Windows platform (typically fo some limited torch functionality)
             kwargs: native pytest.mark.skipif keyword arguments
         """
@@ -66,23 +88,40 @@ def __new__(
             conditions.append(torch_version < LooseVersion(min_torch))
             reasons.append(f"torch>={min_torch}")
 
+        if min_python:
+            py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+            conditions.append(py_version < LooseVersion(min_python))
+            reasons.append(f"python>={min_python}")
+
         if quantization:
             _miss_default = 'fbgemm' not in torch.backends.quantized.supported_engines
             conditions.append(not _TORCH_QUANTIZE_AVAILABLE or _miss_default)
-            reasons.append("missing PyTorch quantization")
+            reasons.append("PyTorch quantization")
 
         if amp_native:
             conditions.append(not _NATIVE_AMP_AVAILABLE)
-            reasons.append("missing native AMP")
+            reasons.append("native AMP")
 
         if amp_apex:
             conditions.append(not _APEX_AVAILABLE)
-            reasons.append("missing NVIDIA Apex")
+            reasons.append("NVIDIA Apex")
 
         if skip_windows:
             conditions.append(sys.platform == "win32")
             reasons.append("unimplemented on Windows")
 
+        if tpu:
+            conditions.append(not _TPU_AVAILABLE)
+            reasons.append("TPU")
+
+        if horovod:
+            conditions.append(not _HOROVOD_AVAILABLE)
+            reasons.append("Horovod")
+
+        if horovod_nccl:
+            conditions.append(not _HOROVOD_NCCL_AVAILABLE)
+            reasons.append("Horovod with NCCL")
+
         reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
         return pytest.mark.skipif(
             *args,

@@ -25,12 +25,10 @@
 from pytorch_lightning import Trainer
 from pytorch_lightning.loggers import TensorBoardLogger
 from tests.helpers import BoringModel
+from tests.helpers.runif import RunIf
 
 
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.5.0"),
-    reason="Minimal PT version is set to 1.5",
-)
+@RunIf(min_torch="1.5.0")
 def test_tensorboard_hparams_reload(tmpdir):
 
     class CustomModel(BoringModel):

@@ -40,14 +40,6 @@
 # This script will run the actual test model training in parallel
 TEST_SCRIPT = os.path.join(os.path.dirname(__file__), 'data', 'horovod', 'train_default_model.py')
 
-try:
-    from horovod.common.util import nccl_built
-    nccl_built()
-except (ImportError, ModuleNotFoundError, AttributeError):
-    _HOROVOD_NCCL_AVAILABLE = False
-finally:
-    _HOROVOD_NCCL_AVAILABLE = True
-
 
 def _run_horovod(trainer_options, on_gpu=False):
     """Execute the training script across multiple workers in parallel."""
@@ -99,8 +91,7 @@ def test_horovod_cpu_implicit(tmpdir):
     _run_horovod(trainer_options)
 
 
-@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
-@RunIf(min_gpus=2, skip_windows=True)
+@RunIf(min_gpus=2, skip_windows=True, horovod_nccl=True)
 def test_horovod_multi_gpu(tmpdir):
     """Test Horovod with multi-GPU support."""
     trainer_options = dict(
@@ -118,9 +109,8 @@ def test_horovod_multi_gpu(tmpdir):
     _run_horovod(trainer_options, on_gpu=True)
 
 
-@pytest.mark.skip(reason="Horovod has a problem with broadcast when using apex?")
-@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
-@RunIf(min_gpus=2, skip_windows=True, amp_apex=True)
+@pytest.mark.skip(reason="Horovod has a problem with broadcast when using apex?")  # todo
+@RunIf(min_gpus=2, skip_windows=True, amp_apex=True, horovod_nccl=True)
 def test_horovod_apex(tmpdir):
     """Test Horovod with multi-GPU support using apex amp."""
     trainer_options = dict(
@@ -140,9 +130,8 @@ def test_horovod_apex(tmpdir):
     _run_horovod(trainer_options, on_gpu=True)
 
 
-@pytest.mark.skip(reason="Skip till Horovod fixes integration with Native torch.cuda.amp")
-@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
-@RunIf(min_gpus=2, skip_windows=True, amp_native=True)
+@pytest.mark.skip(reason="Skip till Horovod fixes integration with Native torch.cuda.amp")  # todo
+@RunIf(min_gpus=2, skip_windows=True, amp_native=True, horovod_nccl=True)
 def test_horovod_amp(tmpdir):
     """Test Horovod with multi-GPU support using native amp."""
     trainer_options = dict(
@@ -162,8 +151,7 @@ def test_horovod_amp(tmpdir):
     _run_horovod(trainer_options, on_gpu=True)
 
 
-@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support")
-@RunIf(min_gpus=1, skip_windows=True)
+@RunIf(min_gpus=1, skip_windows=True, horovod_nccl=True)
 def test_horovod_transfer_batch_to_gpu(tmpdir):
 
     class TestTrainingStepModel(BoringModel):
@@ -225,8 +213,7 @@ def get_optimizer_params(optimizer):
 
 # TODO: unclear Horovod failure...
 @pytest.mark.skip(reason="unclear Horovod failure...")
-@pytest.mark.skipif(not _HOROVOD_AVAILABLE, reason="Horovod is unavailable")
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, horovod=True)
 def test_result_reduce_horovod(tmpdir):
     """Make sure result logging works with Horovod.
 
@@ -276,8 +263,7 @@ def training_epoch_end(self, outputs) -> None:
 
 # TODO: unclear Horovod failure...
 @pytest.mark.skip(reason="unclear Horovod failure...")
-@pytest.mark.skipif(not _HOROVOD_AVAILABLE, reason="Horovod is unavailable")
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, horovod=True)
 def test_accuracy_metric_horovod():
     num_batches = 10
     batch_size = 16

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from distutils.version import LooseVersion
 
 import pytest
 import torch
@@ -130,10 +129,7 @@ def test_torchscript_properties(tmpdir, modelclass):
     ParityModuleRNN,
     BasicGAN,
 ])
-@pytest.mark.skipif(
-    LooseVersion(torch.__version__) < LooseVersion("1.5.0"),
-    reason="torch.save/load has bug loading script modules on torch <= 1.4",
-)
+@RunIf(min_torch="1.5.0")
 def test_torchscript_save_load(tmpdir, modelclass):
     """ Test that scripted LightningModule is correctly saved and can be loaded. """
     model = modelclass()