Lightning-AI · Borda · Feb 21, 2021 · Feb 18, 2021 · Feb 18, 2021 · Feb 19, 2021
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -49,6 +49,12 @@ def forward(self, *inputs, **kwargs):
 
         return super().forward(*inputs, **kwargs)
 
+    def half(self):
+        self.module.half()
+
+    def to(self, *args, **kwargs):
+        self.module.to(*args, **kwargs)
+
     @staticmethod
     def batch_to(data):
         return data.half()

@@ -1,5 +1,6 @@
 import json
 import os
+from unittest.mock import patch
 
 import pytest
 import torch
@@ -8,11 +9,28 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.plugins import DeepSpeedPlugin, DeepSpeedPrecisionPlugin
+from pytorch_lightning.plugins.training_type.deepspeed import LightningDeepSpeedModule
 from pytorch_lightning.utilities import _APEX_AVAILABLE, _DEEPSPEED_AVAILABLE, _NATIVE_AMP_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 
 
+@patch.object(BoringModel, 'to')
+def test_deepspeed_wrapper(mocked_to, tmpdir):
+    """
+        Test to ensure that a model wrapped in `LightningDeepSpeedModule` moves types and device correctly.
+    """
+
+    model = BoringModel()
+    module = LightningDeepSpeedModule(model, precision=16)
+
+    module.half()
+    assert model.dtype == torch.half
+
+    module.to('cuda')
+    assert mocked_to.called, "LightningDeepSpeedModule did not call LightningModule `to` hook when transferring device"
+
+
 @pytest.fixture
 def deepspeed_config():
     return {
@@ -182,7 +200,7 @@ def backward(self, loss: Tensor, optimizer: Optimizer, optimizer_idx: int, *args
     trainer = Trainer(
         fast_dev_run=True,
         default_root_dir=tmpdir,
-        plugins=DeepSpeedPlugin(zero_optimization=False),
+        plugins=DeepSpeedPlugin(),
         gpus=1,
     )
     with pytest.warns(UserWarning, match='Overridden backward hook in the LightningModule will be ignored'):
@@ -210,7 +228,7 @@ def on_train_start(self) -> None:
 
     model = TestModel()
     trainer = Trainer(
-        plugins=DeepSpeedPlugin(zero_optimization=False),
+        plugins=DeepSpeedPlugin(),
         default_root_dir=tmpdir,
         gpus=1,
         fast_dev_run=True,
@@ -267,7 +285,7 @@ def test_deepspeed_multigpu(tmpdir, deepspeed_config):
     """
     model = BoringModel()
     trainer = Trainer(
-        plugins=[DeepSpeedPlugin(zero_optimization=False)],
+        plugins=[DeepSpeedPlugin()],
         default_root_dir=tmpdir,
         gpus=2,
         fast_dev_run=True,
@@ -285,8 +303,9 @@ def _assert_save_model_is_equal(model, tmpdir, trainer):
     # carry out the check only on rank 0
     if trainer.global_rank == 0:
         saved_model = BoringModel.load_from_checkpoint(checkpoint_path)
-        saved_model = saved_model.float()
-        model = model.float().cpu()
+        if model.dtype == torch.half:
+            saved_model = saved_model.half()  # model is loaded in float32 as default, move it to float16
+        model = model.cpu()
         # Assert model parameters are identical after loading
         for orig_param, trained_model_param in zip(model.parameters(), saved_model.parameters()):
             assert torch.equal(orig_param, trained_model_param)