Lightning-AI · tchaton · Mar 8, 2021 · Mar 8, 2021 · Mar 8, 2021 · Mar 8, 2021
@@ -49,9 +49,9 @@ def reduce(self, tensor, *args, **kwargs):
 
         else:
 
-            def _reduce(tensor: torch.Tensor):
-                dtype_tensor = tensor.dtype
-                return tensor.float().mean().type(dtype_tensor)
+            def _reduce(t: torch.Tensor):
+                dtype_tensor = t.dtype
+                return t.float().mean().type(dtype_tensor)
 
             tensor = apply_to_collection(tensor, torch.Tensor, _reduce)
 

@@ -13,13 +13,14 @@
 # limitations under the License.
 import torch
 import torch.nn.functional as F
+from torch.utils.data import DataLoader
 
 import pytorch_lightning as pl
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning.callbacks import EarlyStopping
 from pytorch_lightning.core import memory
-from tests.helpers import BoringModel
+from tests.helpers import BoringModel, RandomDataset
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
@@ -127,17 +128,56 @@ def test_dp_test(tmpdir):
 
 @RunIf(min_gpus=2)
 def test_dp_training_step_dict(tmpdir):
-    """
-    This test verify dp properly reduce dictionaries
-    """
+    """ This test verifies that dp properly reduces dictionaries """
 
-    model = BoringModel()
+    class TestModel(BoringModel):
+
+        def train_dataloader(self):
+            return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+        def val_dataloader(self):
+            return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+        def test_dataloader(self):
+            return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+        def add_outputs(self, output, device):
+            output.update({
+                "reduce_int": torch.tensor(device.index, dtype=torch.int, device=device),
+                "reduce_float": torch.tensor(device.index, dtype=torch.float, device=device),
+            })
+
+        def training_step(self, batch, batch_idx):
+            output = super().training_step(batch, batch_idx)
+            self.add_outputs(output, batch.device)
+            return output
+
+        def validation_step(self, batch, batch_idx):
+            output = super().validation_step(batch, batch_idx)
+            self.add_outputs(output, batch.device)
+            return output
+
+        def test_step(self, batch, batch_idx):
+            output = super().test_step(batch, batch_idx)
+            self.add_outputs(output, batch.device)
+            return output
+
+        def training_epoch_end(self, outputs):
+            assert outputs[0]["loss"].shape == torch.Size([])
+            assert outputs[0]["reduce_int"].item() == 0  # mean([0, 1]) = 0
+            assert outputs[0]["reduce_float"].item() == 0.5  # mean([0., 1.]) = 0.5
+
+    model = TestModel()
     model.training_step_end = None
+    model.validation_step_end = None
+    model.test_step_end = None
+
     trainer = pl.Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
-        limit_train_batches=2,
-        limit_val_batches=0,
+        limit_train_batches=1,
+        limit_val_batches=1,
+        limit_test_batches=1,
         gpus=2,
         accelerator='dp',
     )