Mock GPU accelerator connector tests (#10554)

Lightning-AI · Nov 16, 2021 · af4af3d · af4af3d
1 parent edebd8a
commit af4af3d
Showing 1 changed file with 56 additions and 35 deletions.
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
@@ -86,7 +86,6 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
     assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment)
 
 
-@RunIf(min_gpus=2)
 @mock.patch.dict(
     os.environ,
     {
@@ -98,8 +97,10 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
         "SLURM_LOCALID": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
+@mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp_slurm(setup_distributed_mock):
+def test_accelerator_choice_ddp_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -111,13 +112,13 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=2)
 @mock.patch.dict(
     os.environ,
     {
@@ -129,9 +130,10 @@ def on_fit_start(self, trainer, pl_module):
         "SLURM_LOCALID": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp2_slurm(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -143,13 +145,15 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -161,9 +165,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
-@mock.patch("torch.cuda.device_count", return_value=2)
+@mock.patch("torch.cuda.set_device")
+@mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp_te(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -174,13 +179,15 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -192,9 +199,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
-@mock.patch("torch.cuda.device_count", return_value=2)
+@mock.patch("torch.cuda.set_device")
+@mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp2_te(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -205,11 +213,14 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
@@ -233,7 +244,6 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -245,9 +255,10 @@ def on_fit_start(self, trainer, pl_module):
         "RANK": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_accelerator_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
+def test_accelerator_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -258,11 +269,14 @@ def on_fit_start(self, trainer, pl_module):
             raise SystemExit()
 
     model = BoringModel()
-    trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
+    with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
+        trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
 
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ,
@@ -323,29 +337,28 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(special=True)
-def test_accelerator_choice_ddp_cpu_and_plugin(tmpdir):
+@RunIf(skip_windows=True, special=True)
+def test_accelerator_choice_ddp_cpu_and_strategy(tmpdir):
     """Test that accelerator="ddp_cpu" can work together with an instance of DDPPlugin."""
-    _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPPlugin)
+    _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPPlugin)
 
 
-@RunIf(special=True)
-def test_accelerator_choice_ddp_cpu_and_plugin_spawn(tmpdir):
+@RunIf(skip_windows=True)
+def test_accelerator_choice_ddp_cpu_and_strategy_spawn(tmpdir):
     """Test that accelerator="ddp_cpu" can work together with an instance of DDPPSpawnPlugin."""
-    _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPSpawnPlugin)
-
+    _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnPlugin)
 
-def _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class):
 
+def _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class):
     model = BoringModel()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        plugins=[ddp_plugin_class(find_unused_parameters=True)],
+        strategy=ddp_strategy_class(find_unused_parameters=True),
         fast_dev_run=True,
         accelerator="ddp_cpu",
         num_processes=2,
     )
-    assert isinstance(trainer.training_type_plugin, ddp_plugin_class)
+    assert isinstance(trainer.training_type_plugin, ddp_strategy_class)
     assert isinstance(trainer.accelerator, CPUAccelerator)
     assert trainer.training_type_plugin.num_processes == 2
     assert trainer.training_type_plugin.parallel_devices == [torch.device("cpu")] * 2
@@ -793,7 +806,6 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=2)
 @mock.patch.dict(
     os.environ,
     {
@@ -805,10 +817,11 @@ def on_fit_start(self, trainer, pl_module):
         "SLURM_LOCALID": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
 @pytest.mark.parametrize("strategy", ["ddp2", DDP2Plugin()])
-def test_strategy_choice_ddp2_slurm(device_count_mock, setup_distributed_mock, strategy):
+def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -825,8 +838,9 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -838,9 +852,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_strategy_choice_ddp_te(device_count_mock, setup_distributed_mock):
+def test_strategy_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -856,8 +871,9 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -869,9 +885,10 @@ def on_fit_start(self, trainer, pl_module):
         "GROUP_RANK": "0",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_strategy_choice_ddp2_te(device_count_mock, setup_distributed_mock):
+def test_strategy_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -887,6 +904,8 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
@@ -910,7 +929,6 @@ def on_fit_start(self, trainer, pl_module):
         trainer.fit(model)
 
 
-@RunIf(min_gpus=1)
 @mock.patch.dict(
     os.environ,
     {
@@ -922,9 +940,10 @@ def on_fit_start(self, trainer, pl_module):
         "RANK": "1",
     },
 )
+@mock.patch("torch.cuda.set_device")
 @mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
-def test_strategy_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
+def test_strategy_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
     class CB(Callback):
         def on_fit_start(self, trainer, pl_module):
             assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -940,6 +959,8 @@ def on_fit_start(self, trainer, pl_module):
     with pytest.raises(SystemExit):
         trainer.fit(model)
 
+    set_device_mock.assert_called_once()
+
 
 @mock.patch.dict(
     os.environ,