Lightning-AI · carmocca · Jul 9, 2021 · Jun 21, 2021 · Jun 22, 2021 · Jun 22, 2021
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
@@ -258,7 +258,8 @@ def __init__(self, called):
         pl_module_hooks = get_members(LightningModule)
         # remove most `nn.Module` hooks
         module_hooks = get_members(torch.nn.Module)
-        pl_module_hooks.difference_update(module_hooks - {'forward', 'zero_grad', 'train'})
+        module_hooks.difference_update({'forward', 'zero_grad', 'train'})
+        pl_module_hooks.difference_update(module_hooks)
 
         def call(hook, fn, *args, **kwargs):
             out = fn(*args, **kwargs)
@@ -286,9 +287,15 @@ def test_epoch_end(self, *args, **kwargs):
         # `BoringModel` does not have a return for `test_step_end` so this would fail
         pass
 
+    def _train_batch(self, *args, **kwargs):
+        if self.automatic_optimization:
+            return self._auto_train_batch(*args, **kwargs)
+        return self._manual_train_batch(*args, **kwargs)
+
     @staticmethod
-    def _train_batch(trainer, model, batches, device=torch.device('cpu'), current_epoch=0, **kwargs):
+    def _auto_train_batch(trainer, model, batches, device=torch.device('cpu'), current_epoch=0, **kwargs):
         using_native_amp = kwargs.get('amp_backend') == 'native'
+        using_deepspeed = kwargs.get('plugins') == 'deepspeed'
         out = []
         for i in range(batches):
             out.extend([
@@ -299,6 +306,7 @@ def _train_batch(trainer, model, batches, device=torch.device('cpu'), current_ep
                 dict(name='Callback.on_batch_start', args=(trainer, model)),
                 dict(name='Callback.on_train_batch_start', args=(trainer, model, ANY, i, 0)),
                 dict(name='on_train_batch_start', args=(ANY, i, 0)),
+                # TODO: `on_before_optimizer_step`
                 dict(name='forward', args=(ANY, )),
                 dict(name='training_step', args=(ANY, i)),
                 dict(name='training_step_end', args=(dict(loss=ANY), )),
@@ -307,10 +315,9 @@ def _train_batch(trainer, model, batches, device=torch.device('cpu'), current_ep
                 dict(name='optimizer_zero_grad', args=(current_epoch, i, ANY, 0)),
                 # TODO: `on_before_backward`
                 # DeepSpeed handles backward internally
-                *([dict(name='backward', args=(ANY, ANY, 0))] if kwargs.get('plugins') != 'deepspeed' else []),
+                *([dict(name='backward', args=(ANY, ANY, 0))] if not using_deepspeed else []),
                 dict(name='Callback.on_after_backward', args=(trainer, model)),
                 dict(name='on_after_backward'),
-                # TODO: `on_before_optimizer_step`
                 dict(
                     name='optimizer_step',
                     args=(current_epoch, i, ANY, 0, ANY),
@@ -322,6 +329,36 @@ def _train_batch(trainer, model, batches, device=torch.device('cpu'), current_ep
             ])
         return out
 
+    @staticmethod
+    def _manual_train_batch(trainer, model, batches, device=torch.device('cpu'), **kwargs):
+        using_deepspeed = kwargs.get('plugins') == 'deepspeed'
+        out = []
+        for i in range(batches):
+            out.extend([
+                dict(name='on_before_batch_transfer', args=(ANY, 0)),
+                dict(name='transfer_batch_to_device', args=(ANY, device, 0)),
+                dict(name='on_after_batch_transfer', args=(ANY, 0)),
+                # TODO: `on_batch_{start,end}`
+                dict(name='Callback.on_batch_start', args=(trainer, model)),
+                dict(name='Callback.on_train_batch_start', args=(trainer, model, ANY, i, 0)),
+                dict(name='on_train_batch_start', args=(ANY, i, 0)),
+                dict(name='forward', args=(ANY, )),
+                dict(name='optimizers'),
+                # DeepSpeed handles backward internally
+                *([dict(name='backward', args=(ANY, None, None))] if not using_deepspeed else []),
+                dict(name='Callback.on_after_backward', args=(trainer, model)),
+                dict(name='on_after_backward'),
+                # `manual_backward` calls the previous 3
+                dict(name='manual_backward', args=(ANY, )),
+                # TODO: `on_before_optimizer_step`
+                dict(name='training_step', args=(ANY, i)),
+                dict(name='training_step_end', args=(dict(loss=ANY), )),
+                dict(name='Callback.on_train_batch_end', args=(trainer, model, dict(loss=ANY), ANY, i, 0)),
+                dict(name='on_train_batch_end', args=(dict(loss=ANY), ANY, i, 0)),
+                dict(name='Callback.on_batch_end', args=(trainer, model)),
+            ])
+        return out
+
     @staticmethod
     def _eval_epoch(fn, trainer, model, batches, key, device=torch.device('cpu')):
         outputs = {key: ANY}
@@ -388,9 +425,27 @@ def _predict_batch(trainer, model, batches):
         pytest.param(dict(gpus=1, precision=16, amp_backend='apex'), marks=RunIf(amp_apex=True, min_gpus=1)),
     ]
 )
-def test_trainer_model_hook_system_fit(tmpdir, kwargs):
+@pytest.mark.parametrize('automatic_optimization', (True, False))
+def test_trainer_model_hook_system_fit(tmpdir, kwargs, automatic_optimization):
     called = []
-    model = HookedModel(called)
+
+    class TestModel(HookedModel):
+
+        def __init__(self, *args):
+            super().__init__(*args)
+            self.automatic_optimization = automatic_optimization
+
+        def training_step(self, batch, batch_idx):
+            if self.automatic_optimization:
+                return super().training_step(batch, batch_idx)
+            loss = self.step(batch[0])
+            opt = self.optimizers()
+            opt.zero_grad()
+            self.manual_backward(loss)
+            opt.step()
+            return {'loss': loss}
+
+    model = TestModel(called)
     callback = HookedCallback(called)
     train_batches = 2
     val_batches = 2