From eb895aa1c9d12425cb7bcbb254c162b1a52bd114 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Fri, 6 Aug 2021 12:25:39 +0200
Subject: [PATCH 1/8] fix gpus -1 for CPU

---
 pytorch_lightning/utilities/device_parser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 6656b9765ba00..72cb16a3f56b5 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -168,11 +168,13 @@ def _normalize_parse_gpu_input_to_list(gpus: Union[int, List[int], Tuple[int, ..
     return list(range(gpus))
 
 
-def _get_all_available_gpus() -> List[int]:
+def _get_all_available_gpus() -> Optional[List[int]]:
     """
     Returns:
          a list of all available gpus
     """
+    if not torch.cuda.is_available():
+        return None
     return list(range(torch.cuda.device_count()))
 
 

From 47ba4252923f8d1da7500afce4859a75eba1d7d4 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Thu, 14 Oct 2021 23:42:55 +0200
Subject: [PATCH 2/8] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 pytorch_lightning/utilities/device_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 72cb16a3f56b5..0895b963028d2 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -174,7 +174,7 @@ def _get_all_available_gpus() -> Optional[List[int]]:
          a list of all available gpus
     """
     if not torch.cuda.is_available():
-        return None
+        return []
     return list(range(torch.cuda.device_count()))
 
 

From 9bc5d602cc48ba433a7bdf0890c28a53cbcd5057 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Fri, 15 Oct 2021 00:22:30 +0200
Subject: [PATCH 3/8] fixing

---
 .../connectors/accelerator_connector.py       | 15 ++++-----
 pytorch_lightning/trainer/trainer.py          | 11 ++++---
 pytorch_lightning/utilities/device_parser.py  |  6 ++--
 tests/trainer/flags/test_gpus.py              | 32 +++++++++++++++++++
 4 files changed, 47 insertions(+), 17 deletions(-)
 create mode 100644 tests/trainer/flags/test_gpus.py

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 7b86aab64130a..4db6b85c873b9 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -553,16 +553,14 @@ def is_distributed(self) -> bool:
         if hasattr(self.training_type_plugin, "is_distributed") and not self.use_tpu:
             return self.training_type_plugin.is_distributed
         is_distributed = self.use_ddp or self.use_ddp2 or self.use_horovod
-        if self.use_tpu:
+        if self.use_tpu and hasattr(self.training_type_plugin, "is_distributed"):
             is_distributed |= self.training_type_plugin.is_distributed
         return is_distributed
 
     @property
     def num_gpus(self) -> int:
         gpus = self.parallel_device_ids
-        if gpus is None:
-            return 0
-        return len(gpus)
+        return 0 if gpus is None else len(gpus)
 
     @property
     def num_ipus(self) -> int:
@@ -581,6 +579,8 @@ def parallel_devices(self) -> List[Union[torch.device, int]]:
             # https://github.com/PyTorchLightning/pytorch-lightning/issues/3169
             if isinstance(self.tpu_cores, int):
                 devices = list(range(self.tpu_cores))
+            else:
+                raise MisconfigurationException(f"`tpu_cores` has to be int, but {self.tpu_cores} given.")
         elif self.use_ipu:
             devices = list(range(self.num_ipus))
         else:
@@ -589,11 +589,8 @@ def parallel_devices(self) -> List[Union[torch.device, int]]:
 
     @property
     def root_gpu(self) -> Optional[int]:
-        return (
-            self.accelerator.root_device.index
-            if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator))
-            else None
-        )
+        if not isinstance(self.accelerator, (IPUAccelerator, TPUAccelerator)):
+            return self.accelerator.root_device.index
 
     @staticmethod
     def _is_plugin_training_type(plugin: Union[str, TrainingTypePlugin]) -> bool:
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index be0a7728edddc..d585cfa55b693 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -24,6 +24,7 @@
 from weakref import proxy
 
 import torch
+from torch.cuda.amp import GradScaler
 from torch.optim import Optimizer
 
 import pytorch_lightning as pl
@@ -1490,9 +1491,9 @@ def _on_exception(self):
         file_path = os.path.join(self.default_root_dir, ".pl_auto_save.ckpt")
         self.save_checkpoint(file_path)
 
-    """
-    Accelerator properties
-    """
+    ########################
+    # Accelerator properties
+    ########################
 
     @property
     def accelerator(self) -> Accelerator:
@@ -1555,7 +1556,7 @@ def root_gpu(self) -> Optional[int]:
         return self.accelerator_connector.root_gpu
 
     @property
-    def tpu_cores(self) -> int:
+    def tpu_cores(self) -> Optional[Union[int, List[int]]]:
         return self.accelerator_connector.tpu_cores
 
     @property
@@ -1616,7 +1617,7 @@ def precision(self) -> Union[str, int]:
         return self.accelerator.precision
 
     @property
-    def scaler(self):
+    def scaler(self) -> Optional[GradScaler]:
         return self.accelerator.scaler
 
     @property
diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 72cb16a3f56b5..8d393e42aa350 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -70,7 +70,7 @@ def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[i
     _check_data_type(gpus)
 
     # Handle the case when no gpus are requested
-    if gpus is None or isinstance(gpus, int) and gpus == 0 or str(gpus).strip() == "0":
+    if gpus is None or str(gpus).strip() == "0" or (str(gpus).strip() == "-1" and not torch.cuda.is_available()):
         return None
 
     # We know user requested GPUs therefore if some of the
@@ -168,13 +168,13 @@ def _normalize_parse_gpu_input_to_list(gpus: Union[int, List[int], Tuple[int, ..
     return list(range(gpus))
 
 
-def _get_all_available_gpus() -> Optional[List[int]]:
+def _get_all_available_gpus() -> List[int]:
     """
     Returns:
          a list of all available gpus
     """
     if not torch.cuda.is_available():
-        return None
+        return []
     return list(range(torch.cuda.device_count()))
 
 
diff --git a/tests/trainer/flags/test_gpus.py b/tests/trainer/flags/test_gpus.py
new file mode 100644
index 0000000000000..bb8cd1c350e9a
--- /dev/null
+++ b/tests/trainer/flags/test_gpus.py
@@ -0,0 +1,32 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+
+from pytorch_lightning.trainer import Trainer
+from tests.helpers import BoringModel
+
+
+@pytest.mark.parametrize("gpus", [-1, "-1"])
+def test_all_gpus(tmpdir, gpus):
+    """Testing that the -1 is stable for GPU machines also if GPU is missing."""
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        gpus=gpus,
+    )
+    trainer.fit(model)
+    assert trainer.accelerator_connector.use_gpu == torch.cuda.is_available()
+    assert trainer.accelerator_connector.num_gpus == torch.cuda.device_count()

From 51cb32ffdf05b8948f3dccad5673c882acea3d70 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Fri, 15 Oct 2021 00:24:53 +0200
Subject: [PATCH 4/8] chlog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e9131b0921015..429809e306455 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -483,6 +483,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Fixed parsing argument `gpus=-1`in CPU machines ([#8766](https://github.com/PyTorchLightning/pytorch-lightning/pull/8766))
+
 
 - Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8685](https://github.com/PyTorchLightning/pytorch-lightning/pull/8685))
 

From dd405443f66f2307254bfef90c7d71ac6a4a2303 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Fri, 15 Oct 2021 10:08:02 +0200
Subject: [PATCH 5/8] cleaning

---
 tests/__init__.py                 |   6 +
 tests/loggers/test_tensorboard.py |   7 +-
 tests/models/test_gpu.py          | 201 ------------------------------
 tests/trainer/flags/test_gpus.py  | 193 ++++++++++++++++++++++++++++
 4 files changed, 201 insertions(+), 206 deletions(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index 9039a6e4b16e9..d70a86f2e4fd5 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -12,16 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import operator
 import os
 
 import numpy as np
 
+from pytorch_lightning.utilities.imports import _compare_version
+
 _TEST_ROOT = os.path.dirname(__file__)
 _PROJECT_ROOT = os.path.dirname(_TEST_ROOT)
 _TEMP_PATH = os.path.join(_PROJECT_ROOT, "test_temp")
 _PATH_DATASETS = os.path.join(_PROJECT_ROOT, "Datasets")
 _PATH_LEGACY = os.path.join(_PROJECT_ROOT, "legacy")
 
+PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
+TENSORBOARD_VERSION_GE_2_6 = _compare_version("tensorboard", operator.ge, "2.6.0")
+
 # todo: this setting `PYTHONPATH` may not be used by other evns like Conda for import packages
 if _PROJECT_ROOT not in os.getenv("PYTHONPATH", ""):
     splitter = ":" if os.environ.get("PYTHONPATH", "") else ""
diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py
index 02a809aa2ab30..19a5832e4afba 100644
--- a/tests/loggers/test_tensorboard.py
+++ b/tests/loggers/test_tensorboard.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
-import operator
 import os
 from argparse import Namespace
 from unittest import mock
@@ -25,13 +24,11 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.loggers import TensorBoardLogger
-from pytorch_lightning.utilities.imports import _compare_version
+from tests import TENSORBOARD_VERSION_GE_2_6
 from tests.helpers import BoringModel
 
 
-@pytest.mark.skipif(
-    _compare_version("tensorboard", operator.ge, "2.6.0"), reason="cannot import EventAccumulator in >= 2.6.0"
-)
+@pytest.mark.skipif(TENSORBOARD_VERSION_GE_2_6, reason="cannot import EventAccumulator in >= 2.6.0")
 def test_tensorboard_hparams_reload(tmpdir):
     from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
 
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index d0b25b2af9960..a80b8656c04c6 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -11,31 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import operator
-import os
 from collections import namedtuple
-from unittest import mock
 from unittest.mock import patch
 
-import pytest
 import torch
 
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning import Trainer
-from pytorch_lightning.plugins.environments import TorchElasticEnvironment
-from pytorch_lightning.utilities import device_parser
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _compare_version
-from tests.helpers import BoringModel
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.imports import Batch, Dataset, Example, Field, LabelField
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
 
-PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
-PRETEND_N_OF_GPUS = 16
-
 
 @RunIf(min_gpus=2)
 def test_multi_gpu_none_backend(tmpdir):
@@ -55,195 +43,6 @@ def test_multi_gpu_none_backend(tmpdir):
     tpipes.run_model_test(trainer_options, model, dm)
 
 
-@RunIf(min_gpus=2)
-@pytest.mark.parametrize("gpus", [1, [0], [1]])
-def test_single_gpu_model(tmpdir, gpus):
-    """Make sure single GPU works (DP mode)."""
-    trainer_options = dict(
-        default_root_dir=tmpdir,
-        enable_progress_bar=False,
-        max_epochs=1,
-        limit_train_batches=0.1,
-        limit_val_batches=0.1,
-        gpus=gpus,
-    )
-
-    model = BoringModel()
-    tpipes.run_model_test(trainer_options, model)
-
-
-@pytest.fixture
-def mocked_device_count(monkeypatch):
-    def device_count():
-        return PRETEND_N_OF_GPUS
-
-    def is_available():
-        return True
-
-    monkeypatch.setattr(torch.cuda, "is_available", is_available)
-    monkeypatch.setattr(torch.cuda, "device_count", device_count)
-
-
-@pytest.fixture
-def mocked_device_count_0(monkeypatch):
-    def device_count():
-        return 0
-
-    monkeypatch.setattr(torch.cuda, "device_count", device_count)
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "distributed_backend"],
-    [
-        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
-        pytest.param(0, 0, None, id="Oth gpu, expect 1 gpu to use."),
-        pytest.param(1, 1, None, id="1st gpu, expect 1 gpu to use."),
-        pytest.param(-1, PRETEND_N_OF_GPUS, "ddp", id="-1 - use all gpus"),
-        pytest.param("-1", PRETEND_N_OF_GPUS, "ddp", id="'-1' - use all gpus"),
-        pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)"),
-    ],
-)
-def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "distributed_backend"],
-    [
-        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
-        pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."),
-    ],
-)
-def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
-    [
-        pytest.param(None, None, "ddp", id="None is None"),
-        pytest.param(0, None, "ddp", id="O gpus, expect gpu root device to be None."),
-        pytest.param(1, 0, "ddp", id="1 gpu, expect gpu root device to be 0."),
-        pytest.param(-1, 0, "ddp", id="-1 - use all gpus, expect gpu root device to be 0."),
-        pytest.param("-1", 0, "ddp", id="'-1' - use all gpus, expect gpu root device to be 0."),
-        pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)"),
-    ],
-)
-def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
-    [
-        pytest.param(None, None, None, id="None is None"),
-        pytest.param(None, None, "ddp", id="None is None"),
-        pytest.param(0, None, "ddp", id="None is None"),
-    ],
-)
-def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
-
-
-# Asking for a gpu when non are available will result in a MisconfigurationException
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
-    [
-        (1, None, "ddp"),
-        (3, None, "ddp"),
-        (3, None, "ddp"),
-        ([1, 2], None, "ddp"),
-        ([0, 1], None, "ddp"),
-        (-1, None, "ddp"),
-        ("-1", None, "ddp"),
-    ],
-)
-def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
-    with pytest.raises(MisconfigurationException):
-        Trainer(gpus=gpus, accelerator=distributed_backend)
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu"],
-    [
-        pytest.param(None, None, id="No gpus, expect gpu root device to be None"),
-        pytest.param([0], 0, id="Oth gpu, expect gpu root device to be 0."),
-        pytest.param([1], 1, id="1st gpu, expect gpu root device to be 1."),
-        pytest.param([3], 3, id="3rd gpu, expect gpu root device to be 3."),
-        pytest.param([1, 2], 1, id="[1, 2] gpus, expect gpu root device to be 1."),
-    ],
-)
-def test_determine_root_gpu_device(gpus, expected_root_gpu):
-    assert device_parser.determine_root_gpu_device(gpus) == expected_root_gpu
-
-
-@pytest.mark.parametrize(
-    ["gpus", "expected_gpu_ids"],
-    [
-        (None, None),
-        (0, None),
-        (1, [0]),
-        (3, [0, 1, 2]),
-        pytest.param(-1, list(range(PRETEND_N_OF_GPUS)), id="-1 - use all gpus"),
-        ([0], [0]),
-        ([1, 3], [1, 3]),
-        ((1, 3), [1, 3]),
-        ("0", None),
-        ("3", [0, 1, 2]),
-        ("1, 3", [1, 3]),
-        ("2,", [2]),
-        pytest.param("-1", list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),
-    ],
-)
-def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
-    assert device_parser.parse_gpu_ids(gpus) == expected_gpu_ids
-
-
-@pytest.mark.parametrize("gpus", [0.1, -2, False, [], [-1], [None], ["0"], [0, 0]])
-def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
-
-
-@pytest.mark.parametrize("gpus", [[1, 2, 19], -1, "-1"])
-def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, gpus):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
-
-
-def test_parse_gpu_fail_on_non_existent_id_2(mocked_device_count):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids([1, 2, 19])
-
-
-@pytest.mark.parametrize("gpus", [-1, "-1"])
-def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_count_0, gpus):
-    with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
-
-
-@mock.patch.dict(
-    os.environ,
-    {
-        "CUDA_VISIBLE_DEVICES": "0",
-        "LOCAL_RANK": "1",
-        "GROUP_RANK": "1",
-        "RANK": "3",
-        "WORLD_SIZE": "4",
-        "LOCAL_WORLD_SIZE": "2",
-    },
-)
-@mock.patch("torch.cuda.device_count", return_value=1)
-@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"])
-def test_torchelastic_gpu_parsing(mocked_device_count, gpus):
-    """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
-    sanitizing the gpus as only one of the GPUs is visible."""
-    trainer = Trainer(gpus=gpus)
-    assert isinstance(trainer.accelerator_connector.cluster_environment, TorchElasticEnvironment)
-    assert trainer.accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
-    assert trainer.gpus == gpus
-
-
 @RunIf(min_gpus=1)
 def test_single_gpu_batch_parse():
     trainer = Trainer(gpus=1)
diff --git a/tests/trainer/flags/test_gpus.py b/tests/trainer/flags/test_gpus.py
index bb8cd1c350e9a..69676ab19036f 100644
--- a/tests/trainer/flags/test_gpus.py
+++ b/tests/trainer/flags/test_gpus.py
@@ -11,11 +11,204 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+from unittest import mock
+
 import pytest
 import torch
 
+import tests.helpers.pipelines as tpipes
+from pytorch_lightning.plugins.environments import TorchElasticEnvironment
 from pytorch_lightning.trainer import Trainer
+from pytorch_lightning.utilities import device_parser
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
+from tests.helpers.runif import RunIf
+
+PRETEND_N_OF_GPUS = 16
+
+
+@RunIf(min_gpus=2)
+@pytest.mark.parametrize("gpus", [1, [0], [1]])
+def test_single_gpu_model(tmpdir, gpus):
+    """Make sure single GPU works (DP mode)."""
+    trainer_options = dict(
+        default_root_dir=tmpdir,
+        enable_progress_bar=False,
+        max_epochs=1,
+        limit_train_batches=0.1,
+        limit_val_batches=0.1,
+        gpus=gpus,
+    )
+
+    model = BoringModel()
+    tpipes.run_model_test(trainer_options, model)
+
+
+@pytest.fixture
+def mocked_device_count(monkeypatch):
+    def device_count():
+        return PRETEND_N_OF_GPUS
+
+    def is_available():
+        return True
+
+    monkeypatch.setattr(torch.cuda, "is_available", is_available)
+    monkeypatch.setattr(torch.cuda, "device_count", device_count)
+
+
+@pytest.fixture
+def mocked_device_count_0(monkeypatch):
+    def device_count():
+        return 0
+
+    monkeypatch.setattr(torch.cuda, "device_count", device_count)
+
+
+@pytest.mark.parametrize(
+    ["gpus", "expected_num_gpus", "distributed_backend"],
+    [
+        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
+        pytest.param(0, 0, None, id="Oth gpu, expect 1 gpu to use."),
+        pytest.param(1, 1, None, id="1st gpu, expect 1 gpu to use."),
+        pytest.param(-1, PRETEND_N_OF_GPUS, "ddp", id="-1 - use all gpus"),
+        pytest.param("-1", PRETEND_N_OF_GPUS, "ddp", id="'-1' - use all gpus"),
+        pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)"),
+    ],
+)
+def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distributed_backend):
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
+
+
+@pytest.mark.parametrize(
+    ["gpus", "expected_num_gpus", "distributed_backend"],
+    [
+        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
+        pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."),
+    ],
+)
+def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distributed_backend):
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
+
+
+@pytest.mark.parametrize(
+    ["gpus", "expected_root_gpu", "distributed_backend"],
+    [
+        pytest.param(None, None, "ddp", id="None is None"),
+        pytest.param(0, None, "ddp", id="O gpus, expect gpu root device to be None."),
+        pytest.param(1, 0, "ddp", id="1 gpu, expect gpu root device to be 0."),
+        pytest.param(-1, 0, "ddp", id="-1 - use all gpus, expect gpu root device to be 0."),
+        pytest.param("-1", 0, "ddp", id="'-1' - use all gpus, expect gpu root device to be 0."),
+        pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)"),
+    ],
+)
+def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distributed_backend):
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
+
+
+@pytest.mark.parametrize(
+    ["gpus", "expected_root_gpu", "distributed_backend"],
+    [
+        pytest.param(None, None, None, id="None is None"),
+        pytest.param(None, None, "ddp", id="None is None"),
+        pytest.param(0, None, "ddp", id="None is None"),
+    ],
+)
+def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
+
+
+# Asking for a gpu when non are available will result in a MisconfigurationException
+@pytest.mark.parametrize(
+    ["gpus", "expected_root_gpu", "distributed_backend"],
+    [
+        (1, None, "ddp"),
+        (3, None, "ddp"),
+        (3, None, "ddp"),
+        ([1, 2], None, "ddp"),
+        ([0, 1], None, "ddp"),
+        (-1, None, "ddp"),
+        ("-1", None, "ddp"),
+    ],
+)
+def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
+    with pytest.raises(MisconfigurationException):
+        Trainer(gpus=gpus, accelerator=distributed_backend)
+
+
+@pytest.mark.parametrize(
+    ["gpus", "expected_root_gpu"],
+    [
+        pytest.param(None, None, id="No gpus, expect gpu root device to be None"),
+        pytest.param([0], 0, id="Oth gpu, expect gpu root device to be 0."),
+        pytest.param([1], 1, id="1st gpu, expect gpu root device to be 1."),
+        pytest.param([3], 3, id="3rd gpu, expect gpu root device to be 3."),
+        pytest.param([1, 2], 1, id="[1, 2] gpus, expect gpu root device to be 1."),
+    ],
+)
+def test_determine_root_gpu_device(gpus, expected_root_gpu):
+    assert device_parser.determine_root_gpu_device(gpus) == expected_root_gpu
+
+
+@pytest.mark.parametrize(
+    ["gpus", "expected_gpu_ids"],
+    [
+        (None, None),
+        (0, None),
+        (1, [0]),
+        (3, [0, 1, 2]),
+        pytest.param(-1, list(range(PRETEND_N_OF_GPUS)), id="-1 - use all gpus"),
+        ([0], [0]),
+        ([1, 3], [1, 3]),
+        ((1, 3), [1, 3]),
+        ("0", None),
+        ("3", [0, 1, 2]),
+        ("1, 3", [1, 3]),
+        ("2,", [2]),
+        pytest.param("-1", list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),
+    ],
+)
+def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
+    assert device_parser.parse_gpu_ids(gpus) == expected_gpu_ids
+
+
+@pytest.mark.parametrize("gpus", [0.1, -2, False, [], [-1], [None], ["0"], [0, 0]])
+def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
+    with pytest.raises(MisconfigurationException):
+        device_parser.parse_gpu_ids(gpus)
+
+
+@pytest.mark.parametrize("gpus", [[1, 2, 19]])
+def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, gpus):
+    with pytest.raises(MisconfigurationException):
+        device_parser.parse_gpu_ids(gpus)
+
+
+def test_parse_gpu_fail_on_non_existent_id_2(mocked_device_count):
+    with pytest.raises(MisconfigurationException):
+        device_parser.parse_gpu_ids([1, 2, 19])
+
+
+@mock.patch.dict(
+    os.environ,
+    {
+        "CUDA_VISIBLE_DEVICES": "0",
+        "LOCAL_RANK": "1",
+        "GROUP_RANK": "1",
+        "RANK": "3",
+        "WORLD_SIZE": "4",
+        "LOCAL_WORLD_SIZE": "2",
+    },
+)
+@mock.patch("torch.cuda.device_count", return_value=1)
+@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"])
+def test_torchelastic_gpu_parsing(mocked_device_count, gpus):
+    """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
+    sanitizing the gpus as only one of the GPUs is visible."""
+    trainer = Trainer(gpus=gpus)
+    assert isinstance(trainer.accelerator_connector.cluster_environment, TorchElasticEnvironment)
+    assert trainer.accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
+    assert trainer.gpus == gpus
 
 
 @pytest.mark.parametrize("gpus", [-1, "-1"])

From 955af45381bae3db2120d41ebda39a96b150904b Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Fri, 15 Oct 2021 10:11:43 +0200
Subject: [PATCH 6/8] typo

Co-authored-by: Aki Nitta <nitta@akihironitta.com>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 429809e306455..ecb6ffd5b7325 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -483,7 +483,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
-- Fixed parsing argument `gpus=-1`in CPU machines ([#8766](https://github.com/PyTorchLightning/pytorch-lightning/pull/8766))
+- Fixed parsing argument `gpus=-1` in CPU machines ([#8766](https://github.com/PyTorchLightning/pytorch-lightning/pull/8766))
 
 
 - Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8685](https://github.com/PyTorchLightning/pytorch-lightning/pull/8685))

From 9abc72ce9a4208ddee5aeaf411e46818e4320a5a Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Tue, 26 Oct 2021 23:31:37 +0200
Subject: [PATCH 7/8] flags

---
 tests/trainer/flags/test_gpus.py | 36 ++++++++++++++++----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tests/trainer/flags/test_gpus.py b/tests/trainer/flags/test_gpus.py
index 69676ab19036f..84110fff10629 100644
--- a/tests/trainer/flags/test_gpus.py
+++ b/tests/trainer/flags/test_gpus.py
@@ -66,7 +66,7 @@ def device_count():
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "distributed_backend"],
+    ["gpus", "expected_num_gpus", "strategy"],
     [
         pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
         pytest.param(0, 0, None, id="Oth gpu, expect 1 gpu to use."),
@@ -76,23 +76,23 @@ def device_count():
         pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)"),
     ],
 )
-def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
+def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, strategy):
+    assert Trainer(gpus=gpus, strategy=strategy).num_gpus == expected_num_gpus
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "distributed_backend"],
+    ["gpus", "expected_num_gpus", "strategy"],
     [
         pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
         pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."),
     ],
 )
-def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
+def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, strategy):
+    assert Trainer(gpus=gpus, strategy=strategy).num_gpus == expected_num_gpus
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
+    ["gpus", "expected_root_gpu", "strategy"],
     [
         pytest.param(None, None, "ddp", id="None is None"),
         pytest.param(0, None, "ddp", id="O gpus, expect gpu root device to be None."),
@@ -102,25 +102,25 @@ def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distr
         pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)"),
     ],
 )
-def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
+def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, strategy):
+    assert Trainer(gpus=gpus, strategy=strategy).root_gpu == expected_root_gpu
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
+    ["gpus", "expected_root_gpu", "strategy"],
     [
         pytest.param(None, None, None, id="None is None"),
         pytest.param(None, None, "ddp", id="None is None"),
         pytest.param(0, None, "ddp", id="None is None"),
     ],
 )
-def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
+def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, strategy):
+    assert Trainer(gpus=gpus, strategy=strategy).root_gpu == expected_root_gpu
 
 
 # Asking for a gpu when non are available will result in a MisconfigurationException
 @pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "distributed_backend"],
+    ["gpus", "expected_root_gpu", "strategy"],
     [
         (1, None, "ddp"),
         (3, None, "ddp"),
@@ -131,9 +131,9 @@ def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_
         ("-1", None, "ddp"),
     ],
 )
-def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
+def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, strategy):
     with pytest.raises(MisconfigurationException):
-        Trainer(gpus=gpus, accelerator=distributed_backend)
+        Trainer(gpus=gpus, strategy=strategy)
 
 
 @pytest.mark.parametrize(
@@ -178,7 +178,7 @@ def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
         device_parser.parse_gpu_ids(gpus)
 
 
-@pytest.mark.parametrize("gpus", [[1, 2, 19]])
+@pytest.mark.parametrize("gpus", [[1, 2, 19], -1, "-1"])
 def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, gpus):
     with pytest.raises(MisconfigurationException):
         device_parser.parse_gpu_ids(gpus)
@@ -206,8 +206,8 @@ def test_torchelastic_gpu_parsing(mocked_device_count, gpus):
     """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
     sanitizing the gpus as only one of the GPUs is visible."""
     trainer = Trainer(gpus=gpus)
-    assert isinstance(trainer.accelerator_connector.cluster_environment, TorchElasticEnvironment)
-    assert trainer.accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
+    assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment)
+    assert trainer._accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
     assert trainer.gpus == gpus
 
 

From 3f90fd47f991ea98cbefcb49453a544cdb0a63b4 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Tue, 26 Oct 2021 23:34:33 +0200
Subject: [PATCH 8/8] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 tests/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index d70a86f2e4fd5..f8d79890ff84b 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -25,7 +25,6 @@
 _PATH_DATASETS = os.path.join(_PROJECT_ROOT, "Datasets")
 _PATH_LEGACY = os.path.join(_PROJECT_ROOT, "legacy")
 
-PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
 TENSORBOARD_VERSION_GE_2_6 = _compare_version("tensorboard", operator.ge, "2.6.0")
 
 # todo: this setting `PYTHONPATH` may not be used by other evns like Conda for import packages