Lightning-AI · awaelchli · Jul 22, 2022 · Jun 22, 2022 · Jun 23, 2022 · Jun 23, 2022
diff --git a/src/pytorch_lightning/accelerators/gpu.py b/src/pytorch_lightning/accelerators/gpu.py
@@ -52,7 +52,7 @@ def setup(self, trainer: "pl.Trainer") -> None:
     def set_nvidia_flags(local_rank: int) -> None:
         # set the correct cuda visible devices (using pci order)
         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-        all_gpu_ids = ",".join(str(x) for x in range(torch.cuda.device_count()))
+        all_gpu_ids = ",".join(str(x) for x in range(device_parser.num_cuda_devices()))
         devices = os.getenv("CUDA_VISIBLE_DEVICES", all_gpu_ids)
         _log.info(f"LOCAL_RANK: {local_rank} - CUDA_VISIBLE_DEVICES: [{devices}]")
 
@@ -84,11 +84,11 @@ def get_parallel_devices(devices: List[int]) -> List[torch.device]:
     @staticmethod
     def auto_device_count() -> int:
         """Get the devices when set to auto."""
-        return torch.cuda.device_count()
+        return device_parser.num_cuda_devices()
 
     @staticmethod
     def is_available() -> bool:
-        return torch.cuda.device_count() > 0
+        return device_parser.num_cuda_devices() > 0
 
     @classmethod
     def register_accelerators(cls, accelerator_registry: Dict) -> None:
@@ -156,6 +156,6 @@ def _to_float(x: str) -> float:
 def _get_gpu_id(device_id: int) -> str:
     """Get the unmasked real GPU IDs."""
     # All devices if `CUDA_VISIBLE_DEVICES` unset
-    default = ",".join(str(i) for i in range(torch.cuda.device_count()))
+    default = ",".join(str(i) for i in range(device_parser.num_cuda_devices()))
     cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", default=default).split(",")
     return cuda_visible_devices[device_id].strip()
@@ -24,6 +24,7 @@
 from torch.autograd.profiler import record_function
 
 from pytorch_lightning.profilers.profiler import Profiler
+from pytorch_lightning.utilities.device_parser import is_cuda_available
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE
 from pytorch_lightning.utilities.rank_zero import rank_zero_warn
@@ -368,7 +369,7 @@ def _default_activities(self) -> List["ProfilerActivity"]:
             return activities
         if self._profiler_kwargs.get("use_cpu", True):
             activities.append(ProfilerActivity.CPU)
-        if self._profiler_kwargs.get("use_cuda", torch.cuda.is_available()):
+        if self._profiler_kwargs.get("use_cuda", is_cuda_available()):
             activities.append(ProfilerActivity.CUDA)
         return activities
 

@@ -68,6 +68,7 @@ def __init__(
         ddp_comm_hook: Optional[callable] = None,
         ddp_comm_wrapper: Optional[callable] = None,
         process_group_backend: Optional[str] = None,
+        start_method: str = "spawn",
         **kwargs: Any,
     ):
         super().__init__(
@@ -84,6 +85,7 @@ def __init__(
         self._ddp_comm_wrapper = ddp_comm_wrapper
         self._local_rank = 0
         self._process_group_backend: Optional[str] = process_group_backend
+        self._start_method = start_method
 
     @property
     def num_nodes(self) -> int:
@@ -120,7 +122,7 @@ def process_group_backend(self) -> Optional[str]:
         return self._process_group_backend
 
     def _configure_launcher(self):
-        self._launcher = _SpawnLauncher(self)
+        self._launcher = _SpawnLauncher(self, start_method=self._start_method)
 
     def setup(self, trainer: "pl.Trainer") -> None:
         os.environ["MASTER_PORT"] = str(self.cluster_environment.main_port)
@@ -270,17 +272,20 @@ def post_training_step(self):
 
     @classmethod
     def register_strategies(cls, strategy_registry: Dict) -> None:
-        strategy_registry.register(
-            "ddp_spawn_find_unused_parameters_false",
-            cls,
-            description="DDPSpawn Strategy with `find_unused_parameters` as False",
-            find_unused_parameters=False,
-        )
-        strategy_registry.register(
-            cls.strategy_name,
-            cls,
-            description=f"{cls.__class__.__name__}",
-        )
+        for start_method in ("spawn", "fork"):
+            strategy_registry.register(
+                f"ddp_{start_method}_find_unused_parameters_false",
+                cls,
+                description="DDPSpawn Strategy with `find_unused_parameters` as False",
+                find_unused_parameters=False,
+                start_method=start_method,
+            )
+            strategy_registry.register(
+                f"ddp_{start_method}",
+                cls,
+                description=f"{cls.__class__.__name__}",
+                start_method=start_method,
+            )
 
     def teardown(self) -> None:
         log.detail(f"{self.__class__.__name__}: tearing down strategy")

@@ -46,16 +46,16 @@ class _SpawnLauncher(_Launcher):
         strategy: A reference to the strategy that is used together with this launcher.
     """
 
-    def __init__(self, strategy: Strategy) -> None:
+    def __init__(self, strategy: Strategy, start_method: str = "spawn") -> None:
         self._strategy = strategy
-        self._start_method = "spawn"
+        self._start_method = start_method
 
     @property
     def is_interactive_compatible(self) -> bool:
-        # The start method 'spawn' is currently the only one that works with DDP and CUDA support
-        # The start method 'fork' is the only one supported in Jupyter environments but not compatible with CUDA
-        # For more context, see https://github.com/Lightning-AI/lightning/issues/7550
-        return self._start_method == "fork" and self._strategy.root_device.type != "cuda"
+        # The start method 'spawn' is not supporrted in interactive environments
+        # The start method 'fork' is the only one supported in Jupyter environments, with constraints around CUDA
+        # initialization. For more context, see https://github.com/Lightning-AI/lightning/issues/7550
+        return self._start_method == "fork"
 
     def launch(self, function: Callable, *args: Any, trainer: Optional["pl.Trainer"] = None, **kwargs: Any) -> Any:
         """Spawns processes that run the given function in parallel.
@@ -76,7 +76,7 @@ def launch(self, function: Callable, *args: Any, trainer: Optional["pl.Trainer"]
         os.environ["MASTER_PORT"] = str(self._strategy.cluster_environment.main_port)
         context = mp.get_context(self._start_method)
         return_queue = context.SimpleQueue()
-        mp.spawn(
+        mp.start_processes(
             self._wrapping_function,
             args=(trainer, function, args, kwargs, return_queue),
             nprocs=self._strategy.num_processes,

@@ -82,6 +82,7 @@
     rank_zero_info,
     rank_zero_warn,
 )
+from pytorch_lightning.utilities.device_parser import num_cuda_devices
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import (
     _HOROVOD_AVAILABLE,
@@ -487,7 +488,7 @@ def _choose_accelerator(self) -> str:
                 return "hpu"
             if MPSAccelerator.is_available():
                 return "mps"
-            if torch.cuda.is_available() and torch.cuda.device_count() > 0:
+            if num_cuda_devices() > 0:
                 return "gpu"
         return "cpu"
 

@@ -106,6 +106,7 @@
 from pytorch_lightning.utilities.auto_restart import _add_capture_metadata_collate
 from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.data import _auto_add_worker_init_fn, has_len_all_ranks
+from pytorch_lightning.utilities.device_parser import is_cuda_available
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.exceptions import ExitGracefullyException, MisconfigurationException
 from pytorch_lightning.utilities.imports import _fault_tolerant_training
@@ -1762,7 +1763,7 @@ def _log_device_info(self) -> None:
         rank_zero_info(f"HPU available: {_HPU_AVAILABLE}, using: {num_hpus} HPUs")
 
         # TODO: Integrate MPS Accelerator here, once gpu maps to both
-        if torch.cuda.is_available() and not isinstance(self.accelerator, GPUAccelerator):
+        if is_cuda_available() and not isinstance(self.accelerator, GPUAccelerator):
             rank_zero_warn(
                 "GPU available but not used. Set `accelerator` and `devices` using"
                 f" `Trainer(accelerator='gpu', devices={GPUAccelerator.auto_device_count()})`.",

@@ -15,6 +15,7 @@
 
 import torch
 
+from pytorch_lightning.utilities import device_parser
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
@@ -31,7 +32,7 @@ def pick_multiple_gpus(nb: int) -> List[int]:
             " Please select a valid number of GPU resources when using auto_select_gpus."
         )
 
-    num_gpus = torch.cuda.device_count()
+    num_gpus = device_parser.num_cuda_devices()
     if nb > num_gpus:
         raise MisconfigurationException(f"You requested {nb} GPUs but your machine only has {num_gpus} GPUs.")
     nb = num_gpus if nb == -1 else nb
@@ -51,7 +52,7 @@ def pick_single_gpu(exclude_gpus: List[int]) -> int:
     """
     previously_used_gpus = []
     unused_gpus = []
-    for i in range(torch.cuda.device_count()):
+    for i in range(device_parser.num_cuda_devices()):
         if i in exclude_gpus:
             continue
 

@@ -11,9 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import multiprocessing
 from typing import Any, List, MutableSequence, Optional, Tuple, Union
 
 import torch
+import torch.cuda
 
 from pytorch_lightning.plugins.environments import TorchElasticEnvironment
 from pytorch_lightning.tuner.auto_gpu_select import pick_multiple_gpus
@@ -250,7 +252,7 @@ def _get_all_available_cuda_gpus() -> List[int]:
     Returns:
          a list of all available CUDA gpus
     """
-    return list(range(torch.cuda.device_count()))
+    return list(range(num_cuda_devices()))
 
 
 def _check_unique(device_ids: List[int]) -> None:
@@ -330,3 +332,13 @@ def parse_hpus(devices: Optional[Union[int, str, List[int]]]) -> Optional[int]:
         raise MisconfigurationException("`devices` for `HPUAccelerator` must be int, string or None.")
 
     return int(devices) if isinstance(devices, str) else devices
+
+
+def num_cuda_devices() -> int:
+    with multiprocessing.Pool(1) as pool:
+        return pool.apply(torch.cuda.device_count)
+
+
+def is_cuda_available() -> bool:
+    with multiprocessing.Pool(1) as pool:
+        return pool.apply(torch.cuda.is_available)