Skip to content

Commit

Permalink
Mock GPU accelerator connector tests (#10554)
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca committed Nov 16, 2021
1 parent edebd8a commit af4af3d
Showing 1 changed file with 56 additions and 35 deletions.
91 changes: 56 additions & 35 deletions tests/accelerators/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment)


@RunIf(min_gpus=2)
@mock.patch.dict(
os.environ,
{
Expand All @@ -98,8 +97,10 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
"SLURM_LOCALID": "1",
},
)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_slurm(setup_distributed_mock):
def test_accelerator_choice_ddp_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks
Expand All @@ -111,13 +112,13 @@ def on_fit_start(self, trainer, pl_module):
raise SystemExit()

model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])

with pytest.raises(SystemExit):
trainer.fit(model)


@RunIf(min_gpus=2)
@mock.patch.dict(
os.environ,
{
Expand All @@ -129,9 +130,10 @@ def on_fit_start(self, trainer, pl_module):
"SLURM_LOCALID": "1",
},
)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp2_slurm(device_count_mock, setup_distributed_mock):
def test_accelerator_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks
Expand All @@ -143,13 +145,15 @@ def on_fit_start(self, trainer, pl_module):
raise SystemExit()

model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])

with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@RunIf(min_gpus=1)
@mock.patch.dict(
os.environ,
{
Expand All @@ -161,9 +165,10 @@ def on_fit_start(self, trainer, pl_module):
"GROUP_RANK": "0",
},
)
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_te(device_count_mock, setup_distributed_mock):
def test_accelerator_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
Expand All @@ -174,13 +179,15 @@ def on_fit_start(self, trainer, pl_module):
raise SystemExit()

model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])

with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@RunIf(min_gpus=1)
@mock.patch.dict(
os.environ,
{
Expand All @@ -192,9 +199,10 @@ def on_fit_start(self, trainer, pl_module):
"GROUP_RANK": "0",
},
)
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp2_te(device_count_mock, setup_distributed_mock):
def test_accelerator_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
Expand All @@ -205,11 +213,14 @@ def on_fit_start(self, trainer, pl_module):
raise SystemExit()

model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])

with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@mock.patch.dict(
os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
Expand All @@ -233,7 +244,6 @@ def on_fit_start(self, trainer, pl_module):
trainer.fit(model)


@RunIf(min_gpus=1)
@mock.patch.dict(
os.environ,
{
Expand All @@ -245,9 +255,10 @@ def on_fit_start(self, trainer, pl_module):
"RANK": "1",
},
)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
def test_accelerator_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
Expand All @@ -258,11 +269,14 @@ def on_fit_start(self, trainer, pl_module):
raise SystemExit()

model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])

with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@mock.patch.dict(
os.environ,
Expand Down Expand Up @@ -323,29 +337,28 @@ def on_fit_start(self, trainer, pl_module):
trainer.fit(model)


@RunIf(special=True)
def test_accelerator_choice_ddp_cpu_and_plugin(tmpdir):
@RunIf(skip_windows=True, special=True)
def test_accelerator_choice_ddp_cpu_and_strategy(tmpdir):
"""Test that accelerator="ddp_cpu" can work together with an instance of DDPPlugin."""
_test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPPlugin)
_test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPPlugin)


@RunIf(special=True)
def test_accelerator_choice_ddp_cpu_and_plugin_spawn(tmpdir):
@RunIf(skip_windows=True)
def test_accelerator_choice_ddp_cpu_and_strategy_spawn(tmpdir):
"""Test that accelerator="ddp_cpu" can work together with an instance of DDPPSpawnPlugin."""
_test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPSpawnPlugin)

_test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnPlugin)

def _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class):

def _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class):
model = BoringModel()
trainer = Trainer(
default_root_dir=tmpdir,
plugins=[ddp_plugin_class(find_unused_parameters=True)],
strategy=ddp_strategy_class(find_unused_parameters=True),
fast_dev_run=True,
accelerator="ddp_cpu",
num_processes=2,
)
assert isinstance(trainer.training_type_plugin, ddp_plugin_class)
assert isinstance(trainer.training_type_plugin, ddp_strategy_class)
assert isinstance(trainer.accelerator, CPUAccelerator)
assert trainer.training_type_plugin.num_processes == 2
assert trainer.training_type_plugin.parallel_devices == [torch.device("cpu")] * 2
Expand Down Expand Up @@ -793,7 +806,6 @@ def on_fit_start(self, trainer, pl_module):
trainer.fit(model)


@RunIf(min_gpus=2)
@mock.patch.dict(
os.environ,
{
Expand All @@ -805,10 +817,11 @@ def on_fit_start(self, trainer, pl_module):
"SLURM_LOCALID": "1",
},
)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
@pytest.mark.parametrize("strategy", ["ddp2", DDP2Plugin()])
def test_strategy_choice_ddp2_slurm(device_count_mock, setup_distributed_mock, strategy):
def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks
Expand All @@ -825,8 +838,9 @@ def on_fit_start(self, trainer, pl_module):
with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@RunIf(min_gpus=1)
@mock.patch.dict(
os.environ,
{
Expand All @@ -838,9 +852,10 @@ def on_fit_start(self, trainer, pl_module):
"GROUP_RANK": "0",
},
)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp_te(device_count_mock, setup_distributed_mock):
def test_strategy_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
Expand All @@ -856,8 +871,9 @@ def on_fit_start(self, trainer, pl_module):
with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@RunIf(min_gpus=1)
@mock.patch.dict(
os.environ,
{
Expand All @@ -869,9 +885,10 @@ def on_fit_start(self, trainer, pl_module):
"GROUP_RANK": "0",
},
)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp2_te(device_count_mock, setup_distributed_mock):
def test_strategy_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
Expand All @@ -887,6 +904,8 @@ def on_fit_start(self, trainer, pl_module):
with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@mock.patch.dict(
os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
Expand All @@ -910,7 +929,6 @@ def on_fit_start(self, trainer, pl_module):
trainer.fit(model)


@RunIf(min_gpus=1)
@mock.patch.dict(
os.environ,
{
Expand All @@ -922,9 +940,10 @@ def on_fit_start(self, trainer, pl_module):
"RANK": "1",
},
)
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
def test_strategy_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
Expand All @@ -940,6 +959,8 @@ def on_fit_start(self, trainer, pl_module):
with pytest.raises(SystemExit):
trainer.fit(model)

set_device_mock.assert_called_once()


@mock.patch.dict(
os.environ,
Expand Down

0 comments on commit af4af3d

Please sign in to comment.