Lightning-AI · Borda · Aug 6, 2021 · Oct 14, 2021 · Oct 14, 2021 · Oct 14, 2021
@@ -483,6 +483,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Fixed parsing argument `gpus=-1`in CPU machines ([#8766](https://github.com/PyTorchLightning/pytorch-lightning/pull/8766))
+
 
 - Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8685](https://github.com/PyTorchLightning/pytorch-lightning/pull/8685))
 

@@ -553,16 +553,14 @@ def is_distributed(self) -> bool:
         if hasattr(self.training_type_plugin, "is_distributed") and not self.use_tpu:
             return self.training_type_plugin.is_distributed
         is_distributed = self.use_ddp or self.use_ddp2 or self.use_horovod
-        if self.use_tpu:
+        if self.use_tpu and hasattr(self.training_type_plugin, "is_distributed"):
             is_distributed |= self.training_type_plugin.is_distributed
         return is_distributed
 
     @property
     def num_gpus(self) -> int:
         gpus = self.parallel_device_ids
-        if gpus is None:
-            return 0
-        return len(gpus)
+        return 0 if gpus is None else len(gpus)
 
     @property
     def num_ipus(self) -> int:
@@ -581,6 +579,8 @@ def parallel_devices(self) -> List[Union[torch.device, int]]:
             # https://github.com/PyTorchLightning/pytorch-lightning/issues/3169
             if isinstance(self.tpu_cores, int):
                 devices = list(range(self.tpu_cores))
+            else:
+                raise MisconfigurationException(f"`tpu_cores` has to be int, but {self.tpu_cores} given.")
         elif self.use_ipu:
             devices = list(range(self.num_ipus))
         else:
@@ -589,11 +589,8 @@ def parallel_devices(self) -> List[Union[torch.device, int]]:
 
     @property
     def root_gpu(self) -> Optional[int]:
-        return (
-            self.accelerator.root_device.index
-            if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator))
-            else None
-        )
+        if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator)):
+            return self.accelerator.root_device.index
 
     @staticmethod
     def _is_plugin_training_type(plugin: Union[str, TrainingTypePlugin]) -> bool:

@@ -24,6 +24,7 @@
 from weakref import proxy
 
 import torch
+from torch.cuda.amp import GradScaler
 from torch.optim import Optimizer
 
 import pytorch_lightning as pl
@@ -1490,9 +1491,9 @@ def _on_exception(self):
         file_path = os.path.join(self.default_root_dir, ".pl_auto_save.ckpt")
         self.save_checkpoint(file_path)
 
-    """
-    Accelerator properties
-    """
+    ########################
+    # Accelerator properties
+    ########################
 
     @property
     def accelerator(self) -> Accelerator:
@@ -1555,7 +1556,7 @@ def root_gpu(self) -> Optional[int]:
         return self.accelerator_connector.root_gpu
 
     @property
-    def tpu_cores(self) -> int:
+    def tpu_cores(self) -> Optional[Union[int, List[int]]]:
         return self.accelerator_connector.tpu_cores
 
     @property
@@ -1616,7 +1617,7 @@ def precision(self) -> Union[str, int]:
         return self.accelerator.precision
 
     @property
-    def scaler(self):
+    def scaler(self) -> Optional[GradScaler]:
         return self.accelerator.scaler
 
     @property

@@ -70,7 +70,7 @@ def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[i
     _check_data_type(gpus)
 
     # Handle the case when no gpus are requested
-    if gpus is None or isinstance(gpus, int) and gpus == 0 or str(gpus).strip() == "0":
+    if gpus is None or str(gpus).strip() == "0" or (str(gpus).strip() == "-1" and not torch.cuda.is_available()):
         return None
 
     # We know user requested GPUs therefore if some of the
@@ -173,6 +173,8 @@ def _get_all_available_gpus() -> List[int]:
     Returns:
          a list of all available gpus
     """
+    if not torch.cuda.is_available():
+        return []
     return list(range(torch.cuda.device_count()))
 
 

@@ -12,16 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import operator
 import os
 
 import numpy as np
 
+from pytorch_lightning.utilities.imports import _compare_version
+
 _TEST_ROOT = os.path.dirname(__file__)
 _PROJECT_ROOT = os.path.dirname(_TEST_ROOT)
 _TEMP_PATH = os.path.join(_PROJECT_ROOT, "test_temp")
 _PATH_DATASETS = os.path.join(_PROJECT_ROOT, "Datasets")
 _PATH_LEGACY = os.path.join(_PROJECT_ROOT, "legacy")
 
+PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
+TENSORBOARD_VERSION_GE_2_6 = _compare_version("tensorboard", operator.ge, "2.6.0")
+
 # todo: this setting `PYTHONPATH` may not be used by other evns like Conda for import packages
 if _PROJECT_ROOT not in os.getenv("PYTHONPATH", ""):
     splitter = ":" if os.environ.get("PYTHONPATH", "") else ""

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import operator
 import os
 from argparse import Namespace
 from unittest import mock
@@ -25,13 +24,11 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.loggers import TensorBoardLogger
-from pytorch_lightning.utilities.imports import _compare_version
+from tests import TENSORBOARD_VERSION_GE_2_6
 from tests.helpers import BoringModel
 
 
-@pytest.mark.skipif(
-    _compare_version("tensorboard", operator.ge, "2.6.0"), reason="cannot import EventAccumulator in >= 2.6.0"
-)
+@pytest.mark.skipif(TENSORBOARD_VERSION_GE_2_6, reason="cannot import EventAccumulator in >= 2.6.0")
 def test_tensorboard_hparams_reload(tmpdir):
     from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
 

@@ -11,31 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import operator
-import os
 from collections import namedtuple
-from unittest import mock
 from unittest.mock import patch
 
-import pytest
 import torch
 
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning import Trainer
-from pytorch_lightning.plugins.environments import TorchElasticEnvironment
-from pytorch_lightning.utilities import device_parser
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _compare_version
-from tests.helpers import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.imports import Batch, Dataset, Example, Field, LabelField
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
 
-PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
-PRETEND_N_OF_GPUS = 16
-
 
 @RunIf(min_gpus=2)
 def test_multi_gpu_none_backend(tmpdir):
@@ -55,195 +43,6 @@ def test_multi_gpu_none_backend(tmpdir):
     tpipes.run_model_test(trainer_options, model, dm)
 
 
-@RunIf(min_gpus=2)
-@pytest.mark.parametrize("gpus", [1, [0], [1]])
-def test_single_gpu_model(tmpdir, gpus):
-    """Make sure single GPU works (DP mode)."""
-    trainer_options = dict(
-        default_root_dir=tmpdir,
-        enable_progress_bar=False,
-        max_epochs=1,
-        limit_train_batches=0.1,
-        limit_val_batches=0.1,
-        gpus=gpus,
-    )
-
-    model = BoringModel()
-    tpipes.run_model_test(trainer_options, model)
-
-
-@pytest.fixture
-def mocked_device_count(monkeypatch):
-    def device_count():
-        return PRETEND_N_OF_GPUS
-
-    def is_available():
-        return True
-
-    monkeypatch.setattr(torch.cuda, "is_available", is_available)
-    monkeypatch.setattr(torch.cuda, "device_count", device_count)
-
-
-@pytest.fixture
-def mocked_device_count_0(monkeypatch):
-    def device_count():
-        return 0
-
-    monkeypatch.setattr(torch.cuda, "device_count", device_count)
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "distributed_backend"],
-    [
-        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
-        pytest.param(0, 0, None, id="Oth gpu, expect 1 gpu to use."),
-        pytest.param(1, 1, None, id="1st gpu, expect 1 gpu to use."),
-        pytest.param(-1, PRETEND_N_OF_GPUS, "ddp", id="-1 - use all gpus"),
-        pytest.param("-1", PRETEND_N_OF_GPUS, "ddp", id="'-1' - use all gpus"),
-        pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)"),
-    ],
-)
-def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "distributed_backend"],
-    [
-        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
-        pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."),
-    ],
-)
-def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
-    [
-        pytest.param(None, None, "ddp", id="None is None"),
-        pytest.param(0, None, "ddp", id="O gpus, expect gpu root device to be None."),
-        pytest.param(1, 0, "ddp", id="1 gpu, expect gpu root device to be 0."),
-        pytest.param(-1, 0, "ddp", id="-1 - use all gpus, expect gpu root device to be 0."),
-        pytest.param("-1", 0, "ddp", id="'-1' - use all gpus, expect gpu root device to be 0."),
-        pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)"),
-    ],
-)
-def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
-    [
-        pytest.param(None, None, None, id="None is None"),
-        pytest.param(None, None, "ddp", id="None is None"),
-        pytest.param(0, None, "ddp", id="None is None"),
-    ],
-)
-def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
-
-
-# Asking for a gpu when non are available will result in a MisconfigurationException
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
-    [
-        (1, None, "ddp"),
-        (3, None, "ddp"),
-        (3, None, "ddp"),
-        ([1, 2], None, "ddp"),
-        ([0, 1], None, "ddp"),
-        (-1, None, "ddp"),
-        ("-1", None, "ddp"),
-    ],
-)
-def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
-    with pytest.raises(MisconfigurationException):
-        Trainer(gpus=gpus, accelerator=distributed_backend)
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu"],
-    [
-        pytest.param(None, None, id="No gpus, expect gpu root device to be None"),
-        pytest.param([0], 0, id="Oth gpu, expect gpu root device to be 0."),
-        pytest.param([1], 1, id="1st gpu, expect gpu root device to be 1."),
-        pytest.param([3], 3, id="3rd gpu, expect gpu root device to be 3."),
-        pytest.param([1, 2], 1, id="[1, 2] gpus, expect gpu root device to be 1."),
-    ],
-)
-def test_determine_root_gpu_device(gpus, expected_root_gpu):
-    assert device_parser.determine_root_gpu_device(gpus) == expected_root_gpu
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_gpu_ids"],
-    [
-        (None, None),
-        (0, None),
-        (1, [0]),
-        (3, [0, 1, 2]),
-        pytest.param(-1, list(range(PRETEND_N_OF_GPUS)), id="-1 - use all gpus"),
-        ([0], [0]),
-        ([1, 3], [1, 3]),
-        ((1, 3), [1, 3]),
-        ("0", None),
-        ("3", [0, 1, 2]),
-        ("1, 3", [1, 3]),
-        ("2,", [2]),
-        pytest.param("-1", list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),
-    ],
-)
-def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
-    assert device_parser.parse_gpu_ids(gpus) == expected_gpu_ids
-
-
-@pytest.mark.parametrize("gpus", [0.1, -2, False, [], [-1], [None], ["0"], [0, 0]])
-def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
-
-
-@pytest.mark.parametrize("gpus", [[1, 2, 19], -1, "-1"])
-def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, gpus):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
-
-
-def test_parse_gpu_fail_on_non_existent_id_2(mocked_device_count):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids([1, 2, 19])
-
-
-@pytest.mark.parametrize("gpus", [-1, "-1"])
-def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_count_0, gpus):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
-
-
-@mock.patch.dict(
-    os.environ,
-    {
-        "CUDA_VISIBLE_DEVICES": "0",
-        "LOCAL_RANK": "1",
-        "GROUP_RANK": "1",
-        "RANK": "3",
-        "WORLD_SIZE": "4",
-        "LOCAL_WORLD_SIZE": "2",
-    },
-)
-@mock.patch("torch.cuda.device_count", return_value=1)
-@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"])
-def test_torchelastic_gpu_parsing(mocked_device_count, gpus):
-    """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
-    sanitizing the gpus as only one of the GPUs is visible."""
-    trainer = Trainer(gpus=gpus)
-    assert isinstance(trainer.accelerator_connector.cluster_environment, TorchElasticEnvironment)
-    assert trainer.accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
-    assert trainer.gpus == gpus
-
-
 @RunIf(min_gpus=1)
 def test_single_gpu_batch_parse():
     trainer = Trainer(gpus=1)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -483,6 +483,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

		### Fixed

		- Fixed parsing argument `gpus=-1`in CPU machines ([#8766](https://github.com/PyTorchLightning/pytorch-lightning/pull/8766))
Borda marked this conversation as resolved. Show resolved Hide resolved


		- Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8685](https://github.com/PyTorchLightning/pytorch-lightning/pull/8685))

Expand Down