From 2cc9ee556941ff6f819ee44280d35f25efe8a210 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 14:24:31 +0100 Subject: [PATCH 1/8] TPU --- tests/accelerators/test_tpu_backend.py | 10 ++++---- tests/helpers/runif.py | 9 ++++++- tests/models/test_tpu.py | 31 ++++++++++++------------ tests/utilities/test_xla_device_utils.py | 5 ++-- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/tests/accelerators/test_tpu_backend.py b/tests/accelerators/test_tpu_backend.py index 03da7c81b2b17..2104387643b33 100644 --- a/tests/accelerators/test_tpu_backend.py +++ b/tests/accelerators/test_tpu_backend.py @@ -18,8 +18,8 @@ from pytorch_lightning import Trainer from pytorch_lightning.trainer.states import TrainerState -from pytorch_lightning.utilities import _TPU_AVAILABLE from tests.helpers.boring_model import BoringModel +from tests.helpers.runif import RunIf from tests.helpers.utils import pl_multi_process_test @@ -39,7 +39,7 @@ def forward(self, x): return x -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_resume_training_on_cpu(tmpdir): """ Checks if training can be resumed from a saved checkpoint on CPU""" @@ -70,7 +70,7 @@ def test_resume_training_on_cpu(tmpdir): assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_if_test_works_after_train(tmpdir): """ Ensure that .test() works after .fit() """ @@ -82,7 +82,7 @@ def test_if_test_works_after_train(tmpdir): assert len(trainer.test(model)) == 1 -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_weight_tying_warning(tmpdir, capsys=None): """ @@ -98,7 +98,7 @@ def test_weight_tying_warning(tmpdir, capsys=None): assert result -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_if_weights_tied(tmpdir, capsys=None): """ diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index 128ad3c11de86..3c9c397e10617 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -19,7 +19,8 @@ import torch from pkg_resources import get_distribution -from pytorch_lightning.utilities import _APEX_AVAILABLE, _NATIVE_AMP_AVAILABLE, _TORCH_QUANTIZE_AVAILABLE +from pytorch_lightning.utilities import _APEX_AVAILABLE, _NATIVE_AMP_AVAILABLE, _TORCH_QUANTIZE_AVAILABLE, \ + _TPU_AVAILABLE class RunIf: @@ -40,6 +41,7 @@ def __new__( quantization: bool = False, amp_apex: bool = False, amp_native: bool = False, + tpu: bool = False, skip_windows: bool = False, **kwargs ): @@ -51,6 +53,7 @@ def __new__( quantization: if `torch.quantization` package is required to run test amp_apex: NVIDIA Apex is installed amp_native: if native PyTorch native AMP is supported + tpu: if TPU is available skip_windows: skip test for Windows platform (typically fo some limited torch functionality) kwargs: native pytest.mark.skipif keyword arguments """ @@ -83,6 +86,10 @@ def __new__( conditions.append(sys.platform == "win32") reasons.append("unimplemented on Windows") + if tpu: + conditions.append(not _TPU_AVAILABLE) + reasons.append("TPU") + reasons = [rs for cond, rs in zip(conditions, reasons) if cond] return pytest.mark.skipif( *args, diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py index 6a4605b3e2b36..2eb00b8770438 100644 --- a/tests/models/test_tpu.py +++ b/tests/models/test_tpu.py @@ -29,6 +29,7 @@ from pytorch_lightning.utilities.distributed import ReduceOp from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers import BoringModel, RandomDataset +from tests.helpers.runif import RunIf from tests.helpers.utils import pl_multi_process_test if _TPU_AVAILABLE: @@ -53,7 +54,7 @@ def val_dataloader(self): return DataLoader(RandomDataset(32, 2000), batch_size=32) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_model_tpu_cores_1(tmpdir): """Make sure model trains on TPU.""" @@ -72,7 +73,7 @@ def test_model_tpu_cores_1(tmpdir): @pytest.mark.parametrize('tpu_core', [1, 5]) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_model_tpu_index(tmpdir, tpu_core): """Make sure model trains on TPU.""" @@ -91,7 +92,7 @@ def test_model_tpu_index(tmpdir, tpu_core): assert torch_xla._XLAC._xla_get_default_device() == f'xla:{tpu_core}' -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_model_tpu_cores_8(tmpdir): """Make sure model trains on TPU.""" @@ -110,7 +111,7 @@ def test_model_tpu_cores_8(tmpdir): tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False, min_acc=0.05) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_model_16bit_tpu_cores_1(tmpdir): """Make sure model trains on TPU.""" @@ -131,7 +132,7 @@ def test_model_16bit_tpu_cores_1(tmpdir): @pytest.mark.parametrize('tpu_core', [1, 5]) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_model_16bit_tpu_index(tmpdir, tpu_core): """Make sure model trains on TPU.""" @@ -152,7 +153,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core): assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables" -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_model_16bit_tpu_cores_8(tmpdir): """Make sure model trains on TPU.""" @@ -172,7 +173,7 @@ def test_model_16bit_tpu_cores_8(tmpdir): tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False, min_acc=0.05) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_model_tpu_early_stop(tmpdir): """Test if single TPU core training works""" @@ -200,7 +201,7 @@ def validation_step(self, *args, **kwargs): trainer.fit(model) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_tpu_grad_norm(tmpdir): """Test if grad_norm works on TPU.""" @@ -219,7 +220,7 @@ def test_tpu_grad_norm(tmpdir): tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_dataloaders_passed_to_fit(tmpdir): """Test if dataloaders passed to trainer works on TPU""" @@ -244,7 +245,7 @@ def test_dataloaders_passed_to_fit(tmpdir): [pytest.param(1, None), pytest.param(8, None), pytest.param([1], 1), pytest.param([8], 8)], ) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires missing TPU") +@RunIf(tpu=True) def test_tpu_id_to_be_as_expected(tpu_cores, expected_tpu_id): """Test if trainer.tpu_id is set as expected""" assert Trainer(tpu_cores=tpu_cores).accelerator_connector.tpu_id == expected_tpu_id @@ -265,13 +266,13 @@ def test_exception_when_no_tpu_found(tmpdir): @pytest.mark.parametrize('tpu_cores', [1, 8, [1]]) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) def test_distributed_backend_set_when_using_tpu(tmpdir, tpu_cores): """Test if distributed_backend is set to `tpu` when tpu_cores is not None""" assert Trainer(tpu_cores=tpu_cores).distributed_backend == "tpu" -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_broadcast_on_tpu(): """ Checks if an object from the master process is broadcasted to other processes correctly""" @@ -303,7 +304,7 @@ def test_broadcast(rank): pytest.param(10, None, True), ], ) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_tpu_choice(tmpdir, tpu_cores, expected_tpu_id, error_expected): if error_expected: @@ -319,7 +320,7 @@ def test_tpu_choice(tmpdir, tpu_cores, expected_tpu_id, error_expected): [pytest.param('--tpu_cores=8', {'tpu_cores': 8}), pytest.param("--tpu_cores=1,", {'tpu_cores': '1,'})] ) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_tpu_cores_with_argparse(cli_args, expected): """Test passing tpu_cores in command line""" @@ -334,7 +335,7 @@ def test_tpu_cores_with_argparse(cli_args, expected): assert Trainer.from_argparse_args(args) -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") +@RunIf(tpu=True) @pl_multi_process_test def test_tpu_reduce(): """Test tpu spawn reduce operation """ diff --git a/tests/utilities/test_xla_device_utils.py b/tests/utilities/test_xla_device_utils.py index 73b11b48267ce..02be752e7e2fb 100644 --- a/tests/utilities/test_xla_device_utils.py +++ b/tests/utilities/test_xla_device_utils.py @@ -17,7 +17,8 @@ import pytest import pytorch_lightning.utilities.xla_device as xla_utils -from pytorch_lightning.utilities import _TPU_AVAILABLE, _XLA_AVAILABLE +from pytorch_lightning.utilities import _XLA_AVAILABLE +from tests.helpers.runif import RunIf from tests.helpers.utils import pl_multi_process_test @@ -27,7 +28,7 @@ def test_tpu_device_absence(): assert xla_utils.XLADeviceUtils.tpu_device_exists() is None -@pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires torch_xla to be installed") +@RunIf(tpu=True) @pl_multi_process_test def test_tpu_device_presence(): """Check tpu_device_exists returns True when TPU is available""" From 8fada99ec6c7e58af64bb9f2754cb9ce5c1f8a81 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 14:32:38 +0100 Subject: [PATCH 2/8] horovod --- tests/helpers/runif.py | 29 +++++++++++++++++++++++++++-- tests/models/test_horovod.py | 30 ++++++++---------------------- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index 3c9c397e10617..e41b449e25a77 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -19,8 +19,21 @@ import torch from pkg_resources import get_distribution -from pytorch_lightning.utilities import _APEX_AVAILABLE, _NATIVE_AMP_AVAILABLE, _TORCH_QUANTIZE_AVAILABLE, \ - _TPU_AVAILABLE +from pytorch_lightning.utilities import ( + _APEX_AVAILABLE, + _HOROVOD_AVAILABLE, + _NATIVE_AMP_AVAILABLE, + _TORCH_QUANTIZE_AVAILABLE, + _TPU_AVAILABLE, +) + +try: + from horovod.common.util import nccl_built + nccl_built() +except (ImportError, ModuleNotFoundError, AttributeError): + _HOROVOD_NCCL_AVAILABLE = False +finally: + _HOROVOD_NCCL_AVAILABLE = True class RunIf: @@ -42,6 +55,8 @@ def __new__( amp_apex: bool = False, amp_native: bool = False, tpu: bool = False, + horovod: bool = False, + horovod_nccl: bool = False, skip_windows: bool = False, **kwargs ): @@ -54,6 +69,8 @@ def __new__( amp_apex: NVIDIA Apex is installed amp_native: if native PyTorch native AMP is supported tpu: if TPU is available + horovod: if Horovod is installed + horovod_nccl: if Horovod is installed with NCCL support skip_windows: skip test for Windows platform (typically fo some limited torch functionality) kwargs: native pytest.mark.skipif keyword arguments """ @@ -90,6 +107,14 @@ def __new__( conditions.append(not _TPU_AVAILABLE) reasons.append("TPU") + if horovod: + conditions.append(not _HOROVOD_AVAILABLE) + reasons.append("Horovod") + + if horovod_nccl: + conditions.append(not _HOROVOD_NCCL_AVAILABLE) + reasons.append("Horovod with NCCL") + reasons = [rs for cond, rs in zip(conditions, reasons) if cond] return pytest.mark.skipif( *args, diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 7bfef7ba3f3f9..636979821b313 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -40,14 +40,6 @@ # This script will run the actual test model training in parallel TEST_SCRIPT = os.path.join(os.path.dirname(__file__), 'data', 'horovod', 'train_default_model.py') -try: - from horovod.common.util import nccl_built - nccl_built() -except (ImportError, ModuleNotFoundError, AttributeError): - _HOROVOD_NCCL_AVAILABLE = False -finally: - _HOROVOD_NCCL_AVAILABLE = True - def _run_horovod(trainer_options, on_gpu=False): """Execute the training script across multiple workers in parallel.""" @@ -99,8 +91,7 @@ def test_horovod_cpu_implicit(tmpdir): _run_horovod(trainer_options) -@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") -@RunIf(min_gpus=2, skip_windows=True) +@RunIf(min_gpus=2, skip_windows=True, horovod_nccl=True) def test_horovod_multi_gpu(tmpdir): """Test Horovod with multi-GPU support.""" trainer_options = dict( @@ -118,9 +109,8 @@ def test_horovod_multi_gpu(tmpdir): _run_horovod(trainer_options, on_gpu=True) -@pytest.mark.skip(reason="Horovod has a problem with broadcast when using apex?") -@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") -@RunIf(min_gpus=2, skip_windows=True, amp_apex=True) +@pytest.mark.skip(reason="Horovod has a problem with broadcast when using apex?") # todo +@RunIf(min_gpus=2, skip_windows=True, amp_apex=True, horovod_nccl=True) def test_horovod_apex(tmpdir): """Test Horovod with multi-GPU support using apex amp.""" trainer_options = dict( @@ -140,9 +130,8 @@ def test_horovod_apex(tmpdir): _run_horovod(trainer_options, on_gpu=True) -@pytest.mark.skip(reason="Skip till Horovod fixes integration with Native torch.cuda.amp") -@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") -@RunIf(min_gpus=2, skip_windows=True, amp_native=True) +@pytest.mark.skip(reason="Skip till Horovod fixes integration with Native torch.cuda.amp") # todo +@RunIf(min_gpus=2, skip_windows=True, amp_native=True, horovod_nccl=True) def test_horovod_amp(tmpdir): """Test Horovod with multi-GPU support using native amp.""" trainer_options = dict( @@ -162,8 +151,7 @@ def test_horovod_amp(tmpdir): _run_horovod(trainer_options, on_gpu=True) -@pytest.mark.skipif(not _HOROVOD_NCCL_AVAILABLE, reason="test requires Horovod with NCCL support") -@RunIf(min_gpus=1, skip_windows=True) +@RunIf(min_gpus=1, skip_windows=True, horovod_nccl=True) def test_horovod_transfer_batch_to_gpu(tmpdir): class TestTrainingStepModel(BoringModel): @@ -225,8 +213,7 @@ def get_optimizer_params(optimizer): # TODO: unclear Horovod failure... @pytest.mark.skip(reason="unclear Horovod failure...") -@pytest.mark.skipif(not _HOROVOD_AVAILABLE, reason="Horovod is unavailable") -@RunIf(skip_windows=True) +@RunIf(skip_windows=True, horovod=True) def test_result_reduce_horovod(tmpdir): """Make sure result logging works with Horovod. @@ -276,8 +263,7 @@ def training_epoch_end(self, outputs) -> None: # TODO: unclear Horovod failure... @pytest.mark.skip(reason="unclear Horovod failure...") -@pytest.mark.skipif(not _HOROVOD_AVAILABLE, reason="Horovod is unavailable") -@RunIf(skip_windows=True) +@RunIf(skip_windows=True, horovod=True) def test_accuracy_metric_horovod(): num_batches = 10 batch_size = 16 From 4ea52a76c1bec9ae523a632f618ab862f997b453 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 14:41:29 +0100 Subject: [PATCH 3/8] extra --- tests/accelerators/test_accelerator_connector.py | 2 +- tests/accelerators/test_ddp.py | 3 +-- tests/callbacks/test_gpu_stats_monitor.py | 3 +-- tests/checkpointing/test_model_checkpoint.py | 7 ++----- tests/helpers/runif.py | 6 ++++++ tests/loggers/test_tensorboard.py | 6 ++---- tests/models/test_amp.py | 2 +- tests/models/test_torchscript.py | 6 +----- tests/trainer/test_trainer_cli.py | 6 +----- 9 files changed, 16 insertions(+), 25 deletions(-) diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 50c9ccd47dfed..c74abb888948f 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -84,7 +84,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock): assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment) -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") +@RunIf(min_gpus=2) @mock.patch.dict( os.environ, { "CUDA_VISIBLE_DEVICES": "0,1", diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py index 14e73d920af4b..af1e40345127a 100644 --- a/tests/accelerators/test_ddp.py +++ b/tests/accelerators/test_ddp.py @@ -82,8 +82,7 @@ def test_cli_to_pass(tmpdir, args=None): return '1' -@RunIf(skip_windows=True) -@pytest.mark.skipif(torch.cuda.is_available(), reason="test doesn't requires GPU machine") +@RunIf(min_gpus=1, skip_windows=True) def test_torch_distributed_backend_env_variables(tmpdir): """ This test set `undefined` as torch backend and should raise an `Backend.UNDEFINED` ValueError. diff --git a/tests/callbacks/test_gpu_stats_monitor.py b/tests/callbacks/test_gpu_stats_monitor.py index c2c4c87c284b0..ce670c4a08702 100644 --- a/tests/callbacks/test_gpu_stats_monitor.py +++ b/tests/callbacks/test_gpu_stats_monitor.py @@ -15,7 +15,6 @@ import numpy as np import pytest -import torch from pytorch_lightning import Trainer from pytorch_lightning.callbacks import GPUStatsMonitor @@ -68,7 +67,7 @@ def test_gpu_stats_monitor(tmpdir): assert any([f in h for h in met_data.dtype.names]) -@pytest.mark.skipif(torch.cuda.is_available(), reason="test requires CPU machine") +@RunIf(min_gpus=1) def test_gpu_stats_monitor_cpu_machine(tmpdir): """ Test GPUStatsMonitor on CPU machine. diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py index c993c9c73872c..48e4a22e1ec05 100644 --- a/tests/checkpointing/test_model_checkpoint.py +++ b/tests/checkpointing/test_model_checkpoint.py @@ -15,7 +15,6 @@ import math import os import pickle -import platform import re from argparse import Namespace from pathlib import Path @@ -38,6 +37,7 @@ from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers import BoringModel +from tests.helpers.runif import RunIf class LogInTwoMethods(BoringModel): @@ -364,10 +364,7 @@ def on_train_end(self, trainer, pl_module): assert torch.save.call_count == 0 -@pytest.mark.skipif( - platform.system() == "Windows", - reason="Distributed training is not supported on Windows", -) +@RunIf(skip_windows=True) def test_model_checkpoint_no_extraneous_invocations(tmpdir): """Test to ensure that the model callback saves the checkpoints only once in distributed mode.""" model = LogInTwoMethods() diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index e41b449e25a77..b4e177953d5aa 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -51,6 +51,7 @@ def __new__( *args, min_gpus: int = 0, min_torch: Optional[str] = None, + min_python: Optional[str] = None, quantization: bool = False, amp_apex: bool = False, amp_native: bool = False, @@ -86,6 +87,11 @@ def __new__( conditions.append(torch_version < LooseVersion(min_torch)) reasons.append(f"torch>={min_torch}") + if min_python: + py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + conditions.append(py_version < LooseVersion(min_python)) + reasons.append(f"python>={min_python}") + if quantization: _miss_default = 'fbgemm' not in torch.backends.quantized.supported_engines conditions.append(not _TORCH_QUANTIZE_AVAILABLE or _miss_default) diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py index e5e3f231d3ac7..8a0bedb1adc93 100644 --- a/tests/loggers/test_tensorboard.py +++ b/tests/loggers/test_tensorboard.py @@ -25,12 +25,10 @@ from pytorch_lightning import Trainer from pytorch_lightning.loggers import TensorBoardLogger from tests.helpers import BoringModel +from tests.helpers.runif import RunIf -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.5.0"), - reason="Minimal PT version is set to 1.5", -) +@RunIf(min_torch="1.5") def test_tensorboard_hparams_reload(tmpdir): class CustomModel(BoringModel): diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py index 9853db342436b..fb49fe36499e9 100644 --- a/tests/models/test_amp.py +++ b/tests/models/test_amp.py @@ -162,7 +162,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): assert generated == 'abc23' -@pytest.mark.skipif(torch.cuda.is_available(), reason="test is restricted only on CPU") +@RunIf(min_gpus=1) def test_cpu_model_with_amp(tmpdir): """Make sure model trains on CPU.""" with pytest.raises(MisconfigurationException, match="AMP is only available on GPU"): diff --git a/tests/models/test_torchscript.py b/tests/models/test_torchscript.py index f208a802c2b4c..cde99fda7fd60 100644 --- a/tests/models/test_torchscript.py +++ b/tests/models/test_torchscript.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from distutils.version import LooseVersion import pytest import torch @@ -130,10 +129,7 @@ def test_torchscript_properties(tmpdir, modelclass): ParityModuleRNN, BasicGAN, ]) -@pytest.mark.skipif( - LooseVersion(torch.__version__) < LooseVersion("1.5.0"), - reason="torch.save/load has bug loading script modules on torch <= 1.4", -) +@RunIf(min_torch="1.5") def test_torchscript_save_load(tmpdir, modelclass): """ Test that scripted LightningModule is correctly saved and can be loaded. """ model = modelclass() diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py index 77704b3284eb8..6d36aa75841e4 100644 --- a/tests/trainer/test_trainer_cli.py +++ b/tests/trainer/test_trainer_cli.py @@ -13,7 +13,6 @@ # limitations under the License. import inspect import pickle -import sys from argparse import ArgumentParser, Namespace from unittest import mock @@ -188,10 +187,7 @@ def test_argparse_args_parsing_gpus(cli_args, expected_gpu): assert trainer.data_parallel_device_ids == expected_gpu -@pytest.mark.skipif( - sys.version_info < (3, 7), - reason="signature inspection while mocking is not working in Python < 3.7 despite autospec" -) +@RunIf(min_python="3.7") @pytest.mark.parametrize(['cli_args', 'extra_args'], [ pytest.param({}, {}), pytest.param({'logger': False}, {}), From 8f5b42f5bc7f62060691a064141bb1505fec63c2 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 15:12:42 +0100 Subject: [PATCH 4/8] fix --- tests/helpers/runif.py | 6 +++--- tests/loggers/test_tensorboard.py | 2 +- tests/models/test_torchscript.py | 2 +- tests/trainer/test_trainer_cli.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index b4e177953d5aa..c13b64a1e91eb 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -111,15 +111,15 @@ def __new__( if tpu: conditions.append(not _TPU_AVAILABLE) - reasons.append("TPU") + reasons.append("missing TPU") if horovod: conditions.append(not _HOROVOD_AVAILABLE) - reasons.append("Horovod") + reasons.append("missing Horovod") if horovod_nccl: conditions.append(not _HOROVOD_NCCL_AVAILABLE) - reasons.append("Horovod with NCCL") + reasons.append("missing Horovod with NCCL") reasons = [rs for cond, rs in zip(conditions, reasons) if cond] return pytest.mark.skipif( diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py index 8a0bedb1adc93..1a85270c6dcbb 100644 --- a/tests/loggers/test_tensorboard.py +++ b/tests/loggers/test_tensorboard.py @@ -28,7 +28,7 @@ from tests.helpers.runif import RunIf -@RunIf(min_torch="1.5") +@RunIf(min_torch="1.5.0") def test_tensorboard_hparams_reload(tmpdir): class CustomModel(BoringModel): diff --git a/tests/models/test_torchscript.py b/tests/models/test_torchscript.py index cde99fda7fd60..b03ed0806d800 100644 --- a/tests/models/test_torchscript.py +++ b/tests/models/test_torchscript.py @@ -129,7 +129,7 @@ def test_torchscript_properties(tmpdir, modelclass): ParityModuleRNN, BasicGAN, ]) -@RunIf(min_torch="1.5") +@RunIf(min_torch="1.5.0") def test_torchscript_save_load(tmpdir, modelclass): """ Test that scripted LightningModule is correctly saved and can be loaded. """ model = modelclass() diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py index 6d36aa75841e4..ab0414d895389 100644 --- a/tests/trainer/test_trainer_cli.py +++ b/tests/trainer/test_trainer_cli.py @@ -187,7 +187,7 @@ def test_argparse_args_parsing_gpus(cli_args, expected_gpu): assert trainer.data_parallel_device_ids == expected_gpu -@RunIf(min_python="3.7") +@RunIf(min_python="3.7.0") @pytest.mark.parametrize(['cli_args', 'extra_args'], [ pytest.param({}, {}), pytest.param({'logger': False}, {}), From d4651df25e2de6fedb0e9377cda5a517b2bc16c0 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 15:25:20 +0100 Subject: [PATCH 5/8] Apply suggestions from code review Co-authored-by: Nicki Skafte --- tests/helpers/runif.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index c13b64a1e91eb..ac1293f460da6 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -66,6 +66,7 @@ def __new__( args: native pytest.mark.skipif arguments min_gpus: min number of gpus required to run test min_torch: minimum pytorch version to run test + min_python: minimum python version required to run test quantization: if `torch.quantization` package is required to run test amp_apex: NVIDIA Apex is installed amp_native: if native PyTorch native AMP is supported @@ -111,15 +112,15 @@ def __new__( if tpu: conditions.append(not _TPU_AVAILABLE) - reasons.append("missing TPU") + reasons.append("TPU") if horovod: conditions.append(not _HOROVOD_AVAILABLE) - reasons.append("missing Horovod") + reasons.append("Horovod") if horovod_nccl: conditions.append(not _HOROVOD_NCCL_AVAILABLE) - reasons.append("missing Horovod with NCCL") + reasons.append("Horovod with NCCL") reasons = [rs for cond, rs in zip(conditions, reasons) if cond] return pytest.mark.skipif( From bf397eb744c3c3f5246f2467face738774b0e377 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 15:25:52 +0100 Subject: [PATCH 6/8] doc --- tests/helpers/runif.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index ac1293f460da6..2219b57372333 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -96,15 +96,15 @@ def __new__( if quantization: _miss_default = 'fbgemm' not in torch.backends.quantized.supported_engines conditions.append(not _TORCH_QUANTIZE_AVAILABLE or _miss_default) - reasons.append("missing PyTorch quantization") + reasons.append("PyTorch quantization") if amp_native: conditions.append(not _NATIVE_AMP_AVAILABLE) - reasons.append("missing native AMP") + reasons.append("native AMP") if amp_apex: conditions.append(not _APEX_AVAILABLE) - reasons.append("missing NVIDIA Apex") + reasons.append("NVIDIA Apex") if skip_windows: conditions.append(sys.platform == "win32") From 472e9f4d4f4c9f01545cb1e2183a3611496b2814 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 16:56:00 +0100 Subject: [PATCH 7/8] Apply suggestions from code review --- tests/accelerators/test_ddp.py | 3 ++- tests/callbacks/test_gpu_stats_monitor.py | 2 +- tests/models/test_amp.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py index af1e40345127a..14e73d920af4b 100644 --- a/tests/accelerators/test_ddp.py +++ b/tests/accelerators/test_ddp.py @@ -82,7 +82,8 @@ def test_cli_to_pass(tmpdir, args=None): return '1' -@RunIf(min_gpus=1, skip_windows=True) +@RunIf(skip_windows=True) +@pytest.mark.skipif(torch.cuda.is_available(), reason="test doesn't requires GPU machine") def test_torch_distributed_backend_env_variables(tmpdir): """ This test set `undefined` as torch backend and should raise an `Backend.UNDEFINED` ValueError. diff --git a/tests/callbacks/test_gpu_stats_monitor.py b/tests/callbacks/test_gpu_stats_monitor.py index ce670c4a08702..2aaed5012fb9f 100644 --- a/tests/callbacks/test_gpu_stats_monitor.py +++ b/tests/callbacks/test_gpu_stats_monitor.py @@ -67,7 +67,7 @@ def test_gpu_stats_monitor(tmpdir): assert any([f in h for h in met_data.dtype.names]) -@RunIf(min_gpus=1) +@pytest.mark.skipif(torch.cuda.is_available(), reason="test requires CPU machine") def test_gpu_stats_monitor_cpu_machine(tmpdir): """ Test GPUStatsMonitor on CPU machine. diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py index fb49fe36499e9..9853db342436b 100644 --- a/tests/models/test_amp.py +++ b/tests/models/test_amp.py @@ -162,7 +162,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): assert generated == 'abc23' -@RunIf(min_gpus=1) +@pytest.mark.skipif(torch.cuda.is_available(), reason="test is restricted only on CPU") def test_cpu_model_with_amp(tmpdir): """Make sure model trains on CPU.""" with pytest.raises(MisconfigurationException, match="AMP is only available on GPU"): From 24a858c4556e14ef992ae0f47244daa3d32b7873 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 2 Mar 2021 16:58:34 +0100 Subject: [PATCH 8/8] pep8 --- tests/callbacks/test_gpu_stats_monitor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/callbacks/test_gpu_stats_monitor.py b/tests/callbacks/test_gpu_stats_monitor.py index 2aaed5012fb9f..c2c4c87c284b0 100644 --- a/tests/callbacks/test_gpu_stats_monitor.py +++ b/tests/callbacks/test_gpu_stats_monitor.py @@ -15,6 +15,7 @@ import numpy as np import pytest +import torch from pytorch_lightning import Trainer from pytorch_lightning.callbacks import GPUStatsMonitor