From ae1f2285df8fede35328e21d1722a9254a2f54e1 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 13:20:51 -0400
Subject: [PATCH 1/9] ref: train loop refactors part 2: 1/n

---
 pytorch_lightning/trainer/trainer.py                            | 2 +-
 .../trainer/{training_loop_temp.py => training_loop.py}         | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename pytorch_lightning/trainer/{training_loop_temp.py => training_loop.py} (100%)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index f5a8f276594e8..d81c89327f79a 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -56,7 +56,7 @@
 from pytorch_lightning.accelerators.accelerator_connector import AcceleratorConnector
 from pytorch_lightning.trainer.logger_connector import LoggerConnector
 from pytorch_lightning.trainer.lr_scheduler_connector import LRSchedulerConnector
-from pytorch_lightning.trainer.training_loop_temp import TrainLoop
+from pytorch_lightning.trainer.training_loop import TrainLoop
 from pytorch_lightning import _logger as log
 from pytorch_lightning.trainer.tuning import Tuner
 from pytorch_lightning.utilities.model_utils import is_overridden
diff --git a/pytorch_lightning/trainer/training_loop_temp.py b/pytorch_lightning/trainer/training_loop.py
similarity index 100%
rename from pytorch_lightning/trainer/training_loop_temp.py
rename to pytorch_lightning/trainer/training_loop.py

From 8b27b8f7c9df6d8c7fc06eb39efadabbb1bf8a20 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 14:01:16 -0400
Subject: [PATCH 2/9] ref: device parser

---
 pytorch_lightning/trainer/distrib_parts.py    | 277 ------------------
 pytorch_lightning/trainer/trainer.py          |  14 +-
 pytorch_lightning/tuner/__init__.py           |   0
 pytorch_lightning/tuner/auto_gpu_select.py    |  36 +++
 .../{trainer => tuner}/batch_size_scaling.py  |   0
 .../{trainer => tuner}/tuning.py              |   6 +-
 pytorch_lightning/utilities/device_parser.py  | 199 +++++++++++++
 tests/models/test_gpu.py                      |  14 +-
 8 files changed, 254 insertions(+), 292 deletions(-)
 create mode 100644 pytorch_lightning/tuner/__init__.py
 create mode 100644 pytorch_lightning/tuner/auto_gpu_select.py
 rename pytorch_lightning/{trainer => tuner}/batch_size_scaling.py (100%)
 rename pytorch_lightning/{trainer => tuner}/tuning.py (83%)
 create mode 100644 pytorch_lightning/utilities/device_parser.py

diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
index c085c8acef375..1a98734a802d9 100644
--- a/pytorch_lightning/trainer/distrib_parts.py
+++ b/pytorch_lightning/trainer/distrib_parts.py
@@ -18,43 +18,16 @@
 
 """
 
-from contextlib import ExitStack
-import os
 from abc import ABC, abstractmethod
-import time
-import random
 import torch
-from torch.optim.lr_scheduler import _LRScheduler
 from typing import Union, Callable, Any, List, Optional, Tuple, MutableSequence
 
 from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning import _logger as log
 from pytorch_lightning.overrides.data_parallel import (
     LightningDistributedDataParallel,
     LightningDataParallel,
 )
 from pytorch_lightning.utilities import move_data_to_device, AMPType
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.distributed import rank_zero_only
-
-try:
-    from apex import amp
-except ImportError:
-    amp = None
-
-try:
-    import torch_xla.core.xla_model as xm
-except ImportError:
-    XLA_AVAILABLE = False
-else:
-    XLA_AVAILABLE = True
-
-try:
-    import horovod.torch as hvd
-except (ModuleNotFoundError, ImportError):
-    HOROVOD_AVAILABLE = False
-else:
-    HOROVOD_AVAILABLE = True
 
 
 class TrainerDPMixin(ABC):
@@ -82,30 +55,10 @@ class TrainerDPMixin(ABC):
     logger: ...
     amp_backend: AMPType
 
-    @abstractmethod
-    def call_setup_hook(self, *args):
-        """Warning: this is just empty shell for code implemented in other class."""
-
-    @abstractmethod
-    def init_optimizers(self, *args) -> Tuple[List, List, List]:
-        """Warning: this is just empty shell for code implemented in other class."""
-
     @abstractmethod
     def get_model(self) -> LightningModule:
         """Warning: this is just empty shell for code implemented in other class."""
 
-    @abstractmethod
-    def reinit_scheduler_properties(self, *args):
-        """Warning: this is just empty shell for code implemented in other class."""
-
-    @abstractmethod
-    def setup(self, *args) -> None:
-        """Warning: this is just empty shell for code implemented in other class."""
-
-    @abstractmethod
-    def is_function_implemented(self, *args) -> bool:
-        """Warning: this is just empty shell for code implemented in other class."""
-
     def copy_trainer_model_properties(self, model):
         if isinstance(model, LightningDataParallel):
             ref_model = model.module
@@ -152,233 +105,3 @@ def __transfer_batch_to_device(self, batch: Any, device: torch.device):
         if model is not None:
             return model.transfer_batch_to_device(batch, device)
         return move_data_to_device(batch, device)
-
-
-def _normalize_parse_gpu_string_input(s: Union[int, str, List[int]]) -> Union[int, List[int]]:
-    if isinstance(s, str):
-        if s == '-1':
-            return -1
-        else:
-            return [int(x.strip()) for x in s.split(',') if len(x) > 0]
-    else:
-        return s
-
-
-def get_all_available_gpus() -> List[int]:
-    """
-    Returns:
-         a list of all available gpus
-    """
-    return list(range(torch.cuda.device_count()))
-
-
-def _check_data_type(device_ids: Any) -> None:
-    """
-    Checks that the device_ids argument is one of: None, Int, String or List.
-    Raises a MisconfigurationException otherwise.
-
-    Args:
-        device_ids: gpus/tpu_cores parameter as passed to the Trainer
-    """
-    if device_ids is not None and (not isinstance(device_ids, (int, str, MutableSequence)) or isinstance(device_ids, bool)):
-        raise MisconfigurationException("Device ID's (GPU/TPU) must be int, string or sequence of ints or None.")
-
-
-def _normalize_parse_gpu_input_to_list(gpus: Union[int, List[int]]) -> Optional[List[int]]:
-    assert gpus is not None
-    if isinstance(gpus, MutableSequence):
-        return list(gpus)
-
-    # must be an int
-    if not gpus:  # gpus==0
-        return None
-    if gpus == -1:
-        return get_all_available_gpus()
-
-    return list(range(gpus))
-
-
-def sanitize_gpu_ids(gpus: List[int]) -> List[int]:
-    """
-    Checks that each of the GPUs in the list is actually available.
-    Raises a MisconfigurationException if any of the GPUs is not available.
-
-    Args:
-        gpus: list of ints corresponding to GPU indices
-
-    Returns:
-        unmodified gpus variable
-    """
-    all_available_gpus = get_all_available_gpus()
-    misconfig = False
-    for gpu in gpus:
-        if gpu not in all_available_gpus:
-            misconfig = True
-
-    if misconfig:
-        # sometimes auto ddp might have different flags
-        # but this is not what the user intended
-        # correct for the user
-        if len(gpus) == len(all_available_gpus):
-            gpus = all_available_gpus
-        else:
-            raise MisconfigurationException(f"""
-                You requested GPUs: {gpus}
-                But your machine only has: {all_available_gpus}
-            """)
-    return gpus
-
-
-def _parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[int]]:
-    """
-    Parses the GPU ids given in the format as accepted by the
-    :class:`~pytorch_lightning.trainer.Trainer`.
-
-    Args:
-        gpus: An int -1 or string '-1' indicate that all available GPUs should be used.
-            A list of ints or a string containing list of comma separated integers
-            indicates specific GPUs to use.
-            An int 0 means that no GPUs should be used.
-            Any int N > 0 indicates that GPUs [0..N) should be used.
-
-    Returns:
-        a list of gpus to be used or ``None`` if no GPUs were requested
-
-    If no GPUs are available but the value of gpus variable indicates request for GPUs
-    then a MisconfigurationException is raised.
-    """
-
-    # nothing was passed into the GPUs argument
-    if callable(gpus):
-        return None
-
-    # Check that gpus param is None, Int, String or List
-    _check_data_type(gpus)
-
-    # Handle the case when no gpus are requested
-    if gpus is None or isinstance(gpus, int) and gpus == 0:
-        return None
-
-    # We know user requested GPUs therefore if some of the
-    # requested GPUs are not available an exception is thrown.
-
-    gpus = _normalize_parse_gpu_string_input(gpus)
-    gpus = _normalize_parse_gpu_input_to_list(gpus)
-    if not gpus:
-        raise MisconfigurationException("GPUs requested but none are available.")
-    gpus = sanitize_gpu_ids(gpus)
-
-    return gpus
-
-
-def determine_root_gpu_device(gpus: List[int]) -> Optional[int]:
-    """
-    Args:
-        gpus: non-empty list of ints representing which gpus to use
-
-    Returns:
-        designated root GPU device id
-    """
-    if gpus is None:
-        return None
-
-    assert isinstance(gpus, list), "gpus should be a list"
-    assert len(gpus) > 0, "gpus should be a non empty list"
-
-    # set root gpu
-    root_gpu = gpus[0]
-
-    return root_gpu
-
-
-def retry_jittered_backoff(func: Callable, num_retries: int = 5, cap_delay: float = 1.0, base_delay: float = 0.01):
-    """Retry jittered backoff.
-
-    Based on:
-    https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
-
-    Args:
-        func: tested function
-        num_retries: number of tries
-        cap_delay: max sleep time
-        base_delay: initial sleep time is 10ms
-    """
-    sleep_delay = base_delay         # initial sleep time is 10ms
-
-    for i in range(num_retries):
-        try:
-            return func()
-        except RuntimeError as err:
-            if i == num_retries - 1:
-                raise err
-            else:
-                continue
-        time.sleep(sleep_delay)
-        sleep_delay = min(cap_delay, random.uniform(base_delay, sleep_delay * 3))
-
-
-def _parse_tpu_cores(tpu_cores: Union[int, str, List]) -> Optional[Union[List[int], int]]:
-    """
-    Parses the tpu_cores given in the format as accepted by the
-    :class:`~pytorch_lightning.trainer.Trainer`.
-
-    Args:
-        tpu_cores: An int 1 or string '1' indicate that 1 core with multi-processing should be used
-            An int 8 or string '8' indicate that all 8 cores with multi-processing should be used
-            A list of int or a string containing list of comma separated integer
-            indicates specific TPU core to use.
-
-    Returns:
-        a list of tpu_cores to be used or ``None`` if no TPU cores were requested
-    """
-
-    if callable(tpu_cores):
-        return None
-
-    _check_data_type(tpu_cores)
-
-    if isinstance(tpu_cores, str):
-        tpu_cores = _parse_tpu_cores_str(tpu_cores.strip())
-
-    if not _tpu_cores_valid(tpu_cores):
-        raise MisconfigurationException("`tpu_cores` can only be 1, 8 or [<1-8>]")
-
-    return tpu_cores
-
-
-def _tpu_cores_valid(tpu_cores):
-    return tpu_cores in (1, 8, None) or (
-        isinstance(tpu_cores, (list, tuple, set)) and
-        len(tpu_cores) == 1 and
-        tpu_cores[0] in range(1, 9)
-    )
-
-
-def _parse_tpu_cores_str(tpu_cores):
-    if tpu_cores in ('1', '8'):
-        tpu_cores = int(tpu_cores)
-    else:
-        tpu_cores = [int(x.strip()) for x in tpu_cores.split(',') if len(x) > 0]
-    return tpu_cores
-
-
-def pick_single_gpu(exclude_gpus: list):
-    for i in range(torch.cuda.device_count()):
-        if i in exclude_gpus:
-            continue
-        # Try to allocate on device:
-        device = torch.device(f"cuda:{i}")
-        try:
-            torch.ones(1).to(device)
-        except RuntimeError:
-            continue
-        return i
-    raise RuntimeError("No GPUs available.")
-
-
-def pick_multiple_gpus(nb):
-    picked = []
-    for _ in range(nb):
-        picked.append(pick_single_gpu(exclude_gpus=picked))
-
-    return picked
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index d81c89327f79a..63eb3a122d845 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -36,8 +36,8 @@
 from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
 from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_10
 from pytorch_lightning.trainer.distrib_data_parallel import TrainerDDPMixin
-from pytorch_lightning.trainer.distrib_parts import (TrainerDPMixin, _parse_gpu_ids, _parse_tpu_cores,
-                                                     determine_root_gpu_device, pick_multiple_gpus)
+from pytorch_lightning.utilities import device_parser
+from pytorch_lightning.trainer.distrib_parts import (TrainerDPMixin)
 from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin
 from pytorch_lightning.trainer.logging import TrainerLoggingMixin
 from pytorch_lightning.trainer.lr_finder import TrainerLRFinderMixin
@@ -58,7 +58,7 @@
 from pytorch_lightning.trainer.lr_scheduler_connector import LRSchedulerConnector
 from pytorch_lightning.trainer.training_loop import TrainLoop
 from pytorch_lightning import _logger as log
-from pytorch_lightning.trainer.tuning import Tuner
+from pytorch_lightning.tuner.tuning import Tuner
 from pytorch_lightning.utilities.model_utils import is_overridden
 
 # warnings to ignore in trainer
@@ -449,7 +449,7 @@ def __init__(
             raise MisconfigurationException("track_grad_norm can be an int, a float or 'inf' (infinity norm).")
         self.track_grad_norm = float(track_grad_norm)
 
-        self.tpu_cores = _parse_tpu_cores(tpu_cores)
+        self.tpu_cores = device_parser.parse_tpu_cores(tpu_cores)
         self.on_tpu = self.tpu_cores is not None
 
         self.tpu_id = self.tpu_cores[0] if isinstance(self.tpu_cores, list) else None
@@ -507,12 +507,12 @@ def __init__(
 
         # for gpus allow int, string and gpu list
         if auto_select_gpus and isinstance(gpus, int):
-            self.gpus = pick_multiple_gpus(gpus)
+            self.gpus = self.tuner.pick_multiple_gpus(gpus)
         else:
             self.gpus = gpus
 
-        self.data_parallel_device_ids = _parse_gpu_ids(self.gpus)
-        self.root_gpu = determine_root_gpu_device(self.data_parallel_device_ids)
+        self.data_parallel_device_ids = device_parser.parse_gpu_ids(self.gpus)
+        self.root_gpu = device_parser.determine_root_gpu_device(self.data_parallel_device_ids)
         self.root_device = torch.device("cpu")
 
         self.on_gpu = True if (self.data_parallel_device_ids and torch.cuda.is_available()) else False
diff --git a/pytorch_lightning/tuner/__init__.py b/pytorch_lightning/tuner/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pytorch_lightning/tuner/auto_gpu_select.py b/pytorch_lightning/tuner/auto_gpu_select.py
new file mode 100644
index 0000000000000..f1b13a69745bc
--- /dev/null
+++ b/pytorch_lightning/tuner/auto_gpu_select.py
@@ -0,0 +1,36 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+
+def pick_multiple_gpus(nb):
+    picked = []
+    for _ in range(nb):
+        picked.append(pick_single_gpu(exclude_gpus=picked))
+
+    return picked
+
+
+def pick_single_gpu(exclude_gpus: list):
+    for i in range(torch.cuda.device_count()):
+        if i in exclude_gpus:
+            continue
+        # Try to allocate on device:
+        device = torch.device(f"cuda:{i}")
+        try:
+            torch.ones(1).to(device)
+        except RuntimeError:
+            continue
+        return i
+    raise RuntimeError("No GPUs available.")
diff --git a/pytorch_lightning/trainer/batch_size_scaling.py b/pytorch_lightning/tuner/batch_size_scaling.py
similarity index 100%
rename from pytorch_lightning/trainer/batch_size_scaling.py
rename to pytorch_lightning/tuner/batch_size_scaling.py
diff --git a/pytorch_lightning/trainer/tuning.py b/pytorch_lightning/tuner/tuning.py
similarity index 83%
rename from pytorch_lightning/trainer/tuning.py
rename to pytorch_lightning/tuner/tuning.py
index 331805c9668ac..6601cb93fc62d 100644
--- a/pytorch_lightning/trainer/tuning.py
+++ b/pytorch_lightning/tuner/tuning.py
@@ -11,7 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from pytorch_lightning.trainer.batch_size_scaling import scale_batch_size
+from pytorch_lightning.tuner.batch_size_scaling import scale_batch_size
+from pytorch_lightning.tuner.auto_gpu_select import pick_multiple_gpus
 
 
 class Tuner:
@@ -30,3 +31,6 @@ def scale_batch_size(self,
         return scale_batch_size(
             self.trainer, model, mode, steps_per_trial, init_val, max_trials, batch_arg_name, **fit_kwargs
         )
+
+    def pick_multiple_gpus(self, num_gpus: int):
+        return pick_multiple_gpus(num_gpus)
diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
new file mode 100644
index 0000000000000..35ae7aa9040dc
--- /dev/null
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -0,0 +1,199 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from typing import Union, Any, List, Optional, MutableSequence
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+def determine_root_gpu_device(gpus: List[int]) -> Optional[int]:
+    """
+    Args:
+        gpus: non-empty list of ints representing which gpus to use
+
+    Returns:
+        designated root GPU device id
+    """
+    if gpus is None:
+        return None
+
+    assert isinstance(gpus, list), "gpus should be a list"
+    assert len(gpus) > 0, "gpus should be a non empty list"
+
+    # set root gpu
+    root_gpu = gpus[0]
+
+    return root_gpu
+
+
+def parse_gpu_ids(gpus: Optional[Union[int, str, List[int]]]) -> Optional[List[int]]:
+    """
+    Parses the GPU ids given in the format as accepted by the
+    :class:`~pytorch_lightning.trainer.Trainer`.
+
+    Args:
+        gpus: An int -1 or string '-1' indicate that all available GPUs should be used.
+            A list of ints or a string containing list of comma separated integers
+            indicates specific GPUs to use.
+            An int 0 means that no GPUs should be used.
+            Any int N > 0 indicates that GPUs [0..N) should be used.
+
+    Returns:
+        a list of gpus to be used or ``None`` if no GPUs were requested
+
+    If no GPUs are available but the value of gpus variable indicates request for GPUs
+    then a MisconfigurationException is raised.
+    """
+
+    # nothing was passed into the GPUs argument
+    if callable(gpus):
+        return None
+
+    # Check that gpus param is None, Int, String or List
+    _check_data_type(gpus)
+
+    # Handle the case when no gpus are requested
+    if gpus is None or isinstance(gpus, int) and gpus == 0:
+        return None
+
+    # We know user requested GPUs therefore if some of the
+    # requested GPUs are not available an exception is thrown.
+
+    gpus = _normalize_parse_gpu_string_input(gpus)
+    gpus = _normalize_parse_gpu_input_to_list(gpus)
+    if not gpus:
+        raise MisconfigurationException("GPUs requested but none are available.")
+    gpus = _sanitize_gpu_ids(gpus)
+
+    return gpus
+
+
+def parse_tpu_cores(tpu_cores: Union[int, str, List]) -> Optional[Union[List[int], int]]:
+    """
+    Parses the tpu_cores given in the format as accepted by the
+    :class:`~pytorch_lightning.trainer.Trainer`.
+
+    Args:
+        tpu_cores: An int 1 or string '1' indicate that 1 core with multi-processing should be used
+            An int 8 or string '8' indicate that all 8 cores with multi-processing should be used
+            A list of int or a string containing list of comma separated integer
+            indicates specific TPU core to use.
+
+    Returns:
+        a list of tpu_cores to be used or ``None`` if no TPU cores were requested
+    """
+
+    if callable(tpu_cores):
+        return None
+
+    _check_data_type(tpu_cores)
+
+    if isinstance(tpu_cores, str):
+        tpu_cores = _parse_tpu_cores_str(tpu_cores.strip())
+
+    if not _tpu_cores_valid(tpu_cores):
+        raise MisconfigurationException("`tpu_cores` can only be 1, 8 or [<1-8>]")
+
+    return tpu_cores
+
+
+def _normalize_parse_gpu_string_input(s: Union[int, str, List[int]]) -> Union[int, List[int]]:
+    if isinstance(s, str):
+        if s == '-1':
+            return -1
+        else:
+            return [int(x.strip()) for x in s.split(',') if len(x) > 0]
+    else:
+        return s
+
+
+def _sanitize_gpu_ids(gpus: List[int]) -> List[int]:
+    """
+    Checks that each of the GPUs in the list is actually available.
+    Raises a MisconfigurationException if any of the GPUs is not available.
+
+    Args:
+        gpus: list of ints corresponding to GPU indices
+
+    Returns:
+        unmodified gpus variable
+    """
+    all_available_gpus = _get_all_available_gpus()
+    misconfig = False
+    for gpu in gpus:
+        if gpu not in all_available_gpus:
+            misconfig = True
+
+    if misconfig:
+        # sometimes auto ddp might have different flags
+        # but this is not what the user intended
+        # correct for the user
+        if len(gpus) == len(all_available_gpus):
+            gpus = all_available_gpus
+        else:
+            raise MisconfigurationException(f"""
+                You requested GPUs: {gpus}
+                But your machine only has: {all_available_gpus}
+            """)
+    return gpus
+
+
+def _normalize_parse_gpu_input_to_list(gpus: Union[int, List[int]]) -> Optional[List[int]]:
+    assert gpus is not None
+    if isinstance(gpus, MutableSequence):
+        return list(gpus)
+
+    # must be an int
+    if not gpus:  # gpus==0
+        return None
+    if gpus == -1:
+        return _get_all_available_gpus()
+
+    return list(range(gpus))
+
+
+def _get_all_available_gpus() -> List[int]:
+    """
+    Returns:
+         a list of all available gpus
+    """
+    return list(range(torch.cuda.device_count()))
+
+
+def _check_data_type(device_ids: Any) -> None:
+    """
+    Checks that the device_ids argument is one of: None, Int, String or List.
+    Raises a MisconfigurationException otherwise.
+
+    Args:
+        device_ids: gpus/tpu_cores parameter as passed to the Trainer
+    """
+    if device_ids is not None and \
+            (not isinstance(device_ids, (int, str, MutableSequence)) or isinstance(device_ids, bool)):
+        raise MisconfigurationException("Device ID's (GPU/TPU) must be int, string or sequence of ints or None.")
+
+
+def _tpu_cores_valid(tpu_cores):
+    return tpu_cores in (1, 8, None) or (
+        isinstance(tpu_cores, (list, tuple, set)) and
+        len(tpu_cores) == 1 and
+        tpu_cores[0] in range(1, 9)
+    )
+
+
+def _parse_tpu_cores_str(tpu_cores):
+    if tpu_cores in ('1', '8'):
+        tpu_cores = int(tpu_cores)
+    else:
+        tpu_cores = [int(x.strip()) for x in tpu_cores.split(',') if len(x) > 0]
+    return tpu_cores
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index b6a2efbb8621b..fa13aaf4a7e93 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -14,7 +14,7 @@
 import tests.base.develop_utils as tutils
 from pytorch_lightning import Trainer
 from pytorch_lightning.core import memory
-from pytorch_lightning.trainer.distrib_parts import _parse_gpu_ids, determine_root_gpu_device
+from pytorch_lightning.utilities import device_parser
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.base import EvalModelTemplate
 from tests.models.data.ddp import train_test_variations
@@ -275,7 +275,7 @@ def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_
     pytest.param([1, 2], 1, id="[1, 2] gpus, expect gpu root device to be 1."),
 ])
 def test_determine_root_gpu_device(gpus, expected_root_gpu):
-    assert determine_root_gpu_device(gpus) == expected_root_gpu
+    assert device_parser.determine_root_gpu_device(gpus) == expected_root_gpu
 
 
 @pytest.mark.gpus_param_tests
@@ -294,7 +294,7 @@ def test_determine_root_gpu_device(gpus, expected_root_gpu):
     pytest.param('-1', list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),
 ])
 def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
-    assert _parse_gpu_ids(gpus) == expected_gpu_ids
+    assert device_parser.parse_gpu_ids(gpus) == expected_gpu_ids
 
 
 @pytest.mark.gpus_param_tests
@@ -310,27 +310,27 @@ def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
 ])
 def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
     with pytest.raises(MisconfigurationException):
-        _parse_gpu_ids(gpus)
+        device_parser.parse_gpu_ids(gpus)
 
 
 @pytest.mark.gpus_param_tests
 @pytest.mark.parametrize("gpus", [[1, 2, 19], -1, '-1'])
 def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, gpus):
     with pytest.raises(MisconfigurationException):
-        _parse_gpu_ids(gpus)
+        device_parser.parse_gpu_ids(gpus)
 
 
 @pytest.mark.gpus_param_tests
 def test_parse_gpu_fail_on_non_existent_id_2(mocked_device_count):
     with pytest.raises(MisconfigurationException):
-        _parse_gpu_ids([1, 2, 19])
+        device_parser.parse_gpu_ids([1, 2, 19])
 
 
 @pytest.mark.gpus_param_tests
 @pytest.mark.parametrize("gpus", [-1, '-1'])
 def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_count_0, gpus):
     with pytest.raises(MisconfigurationException):
-        _parse_gpu_ids(gpus)
+        device_parser.parse_gpu_ids(gpus)
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")

From 42508271ce10b93eddf3262fe4c2ca57aec2a018 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 14:07:51 -0400
Subject: [PATCH 3/9] ref: device parser

---
 pytorch_lightning/utilities/device_parser.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 35ae7aa9040dc..ef1807b43f3ab 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -184,11 +184,17 @@ def _check_data_type(device_ids: Any) -> None:
 
 
 def _tpu_cores_valid(tpu_cores):
-    return tpu_cores in (1, 8, None) or (
-        isinstance(tpu_cores, (list, tuple, set)) and
-        len(tpu_cores) == 1 and
-        tpu_cores[0] in range(1, 9)
-    )
+    # allow 1 or 8 cores
+    if tpu_cores in (1, 8 , None):
+        return True
+
+    # allow picking 1 of 8 indexes
+    is_tpu_list = isinstance(tpu_cores, (list, tuple, set))
+    has_1_tpu_idx = len(tpu_cores) == 1
+    is_valid_tpu_idx = tpu_cores[0] in range(1, 9)
+
+    is_valid_tpu_core_choice = is_tpu_list and has_1_tpu_idx and is_valid_tpu_idx
+    return is_valid_tpu_core_choice
 
 
 def _parse_tpu_cores_str(tpu_cores):

From 8c2c1d27475230d5817a217c7fd5fdf15c354597 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 14:09:53 -0400
Subject: [PATCH 4/9] ref: device parser

---
 pytorch_lightning/utilities/device_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index ef1807b43f3ab..619d722a35f25 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -185,7 +185,7 @@ def _check_data_type(device_ids: Any) -> None:
 
 def _tpu_cores_valid(tpu_cores):
     # allow 1 or 8 cores
-    if tpu_cores in (1, 8 , None):
+    if tpu_cores in (1, 8, None):
         return True
 
     # allow picking 1 of 8 indexes

From a2c17ab70ccb057d26f51f72d800c22715d04d48 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 14:15:34 -0400
Subject: [PATCH 5/9] ref: device parser

---
 docs/source/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 721e8b763619a..d37498e9e9f5d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -140,3 +140,4 @@ Indices and tables
    api/pytorch_lightning.profiler
    api/pytorch_lightning.trainer
    api/pytorch_lightning.utilities
+   api/pytorch_lightning.tuner

From a9b4e212485b0be948b94d1353d4a9932f23c75b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 14:29:13 -0400
Subject: [PATCH 6/9] ref: device parser

---
 pytorch_lightning/utilities/device_parser.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 619d722a35f25..4f798918f16df 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -189,12 +189,12 @@ def _tpu_cores_valid(tpu_cores):
         return True
 
     # allow picking 1 of 8 indexes
-    is_tpu_list = isinstance(tpu_cores, (list, tuple, set))
-    has_1_tpu_idx = len(tpu_cores) == 1
-    is_valid_tpu_idx = tpu_cores[0] in range(1, 9)
+    if isinstance(tpu_cores, (list, tuple, set)):
+        has_1_tpu_idx = len(tpu_cores) == 1
+        is_valid_tpu_idx = tpu_cores[0] in range(1, 9)
 
-    is_valid_tpu_core_choice = is_tpu_list and has_1_tpu_idx and is_valid_tpu_idx
-    return is_valid_tpu_core_choice
+        is_valid_tpu_core_choice = has_1_tpu_idx and is_valid_tpu_idx
+        return is_valid_tpu_core_choice
 
 
 def _parse_tpu_cores_str(tpu_cores):

From 4955db537fdfeff96ccbb7280450a622ff0ccdbf Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 14:45:50 -0400
Subject: [PATCH 7/9] ref: device parser

---
 pytorch_lightning/utilities/device_parser.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pytorch_lightning/utilities/device_parser.py b/pytorch_lightning/utilities/device_parser.py
index 4f798918f16df..f67b09eccf51d 100644
--- a/pytorch_lightning/utilities/device_parser.py
+++ b/pytorch_lightning/utilities/device_parser.py
@@ -196,6 +196,8 @@ def _tpu_cores_valid(tpu_cores):
         is_valid_tpu_core_choice = has_1_tpu_idx and is_valid_tpu_idx
         return is_valid_tpu_core_choice
 
+    return False
+
 
 def _parse_tpu_cores_str(tpu_cores):
     if tpu_cores in ('1', '8'):

From ba03a3de9f526f3a3c22be269a8d8a367d64d2ca Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 15:08:25 -0400
Subject: [PATCH 8/9] ref: device parser

---
 pytorch_lightning/trainer/distrib_parts.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
index 1a98734a802d9..95a0010238ad7 100644
--- a/pytorch_lightning/trainer/distrib_parts.py
+++ b/pytorch_lightning/trainer/distrib_parts.py
@@ -29,6 +29,13 @@
 )
 from pytorch_lightning.utilities import move_data_to_device, AMPType
 
+try:
+    import horovod.torch as hvd
+except (ModuleNotFoundError, ImportError):
+    HOROVOD_AVAILABLE = False
+else:
+    HOROVOD_AVAILABLE = True
+
 
 class TrainerDPMixin(ABC):
 

From 35a7982d595c48c3d7dd969d0d23d20df6d1ff68 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 8 Sep 2020 18:07:04 -0400
Subject: [PATCH 9/9] ref: device parser

---
 pytorch_lightning/trainer/trainer.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 63eb3a122d845..b3b74575ea89d 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -373,6 +373,20 @@ def __init__(
         if 'LOCAL_RANK' in os.environ:
             rank_zero_only.rank = int(os.environ['LOCAL_RANK'])
 
+        # tracks internal state for debugging
+        self.dev_debugger = InternalDebugger(self)
+        self.config_validator = ConfigValidator(self)
+        self.data_connector = DataConnector(self)
+        self.lr_scheduler_connector = LRSchedulerConnector(self)
+        self.accelerator_connector = AcceleratorConnector(self)
+        self.logger_connector = LoggerConnector(self)
+        self.tuner = Tuner(self)
+        self.accelerator_backend = None
+
+        # loops
+        self.evaluation_loop = EvaluationLoop(self)
+        self.train_loop = TrainLoop(self)
+
         # training bookeeping
         self.total_batch_idx = 0
         self.running_loss = TensorRunningAccum(window_length=20)
@@ -605,20 +619,6 @@ def __init__(
 
         self.on_colab_kaggle = os.getenv('COLAB_GPU') or os.getenv('KAGGLE_URL_BASE')
 
-        # tracks internal state for debugging
-        self.dev_debugger = InternalDebugger(self)
-        self.config_validator = ConfigValidator(self)
-        self.data_connector = DataConnector(self)
-        self.lr_scheduler_connector = LRSchedulerConnector(self)
-        self.accelerator_connector = AcceleratorConnector(self)
-        self.logger_connector = LoggerConnector(self)
-        self.tuner = Tuner(self)
-        self.accelerator_backend = None
-
-        # loops
-        self.evaluation_loop = EvaluationLoop(self)
-        self.train_loop = TrainLoop(self)
-
         # Callback system
         self.on_init_end()