Lightning-AI · Borda · Feb 17, 2021 · Feb 1, 2021 · Feb 1, 2021 · Feb 15, 2021
@@ -289,6 +289,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Remove unnecessary intermediate layers in Dockerfiles ([#5697](https://github.com/PyTorchLightning/pytorch-lightning/pull/5697))
 - Fixed auto learning rate ordering ([#5638](https://github.com/PyTorchLightning/pytorch-lightning/pull/5638))
 
+- Fixed wrong `requires_grad` state after `return None` with multiple optimizers ([#5738](https://github.com/PyTorchLightning/pytorch-lightning/pull/5638))
 
 ## [1.1.6] - 2021-01-26
 

@@ -774,24 +774,23 @@ def training_step_and_backward(self, split_batch, batch_idx, opt_idx, optimizer,
             result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
             self._curr_step_result = result
 
-            if result is None:
-                if self.automatic_optimization:
-                    self.warning_cache.warn("training_step returned None if it was on purpose, ignore this warning...")
-                return None
-
             if not self._skip_backward and self.trainer.train_loop.automatic_optimization:
                 # backward pass
-                with self.trainer.profiler.profile("model_backward"):
-                    self.backward(result, optimizer, opt_idx)
+                if result is not None:
+                    with self.trainer.profiler.profile("model_backward"):
+                        self.backward(result, optimizer, opt_idx)
 
-                # hook - call this hook only
-                # when gradients have finished to accumulate
-                if not self.should_accumulate():
-                    self.on_after_backward(result.training_step_output, batch_idx, result.loss)
+                    # hook - call this hook only
+                    # when gradients have finished to accumulate
+                    if not self.should_accumulate():
+                        self.on_after_backward(result.training_step_output, batch_idx, result.loss)
 
-                # check if loss or model weights are nan
-                if self.trainer.terminate_on_nan:
-                    self.trainer.detect_nan_tensors(result.loss)
+                    # check if loss or model weights are nan
+                    if self.trainer.terminate_on_nan:
+                        self.trainer.detect_nan_tensors(result.loss)
+
+                else:
+                    self.warning_cache.warn("training_step returned None if it was on purpose, ignore this warning...")
 
                 if len(self.trainer.optimizers) > 1:
                     # revert back to previous state

@@ -385,7 +385,9 @@ def optimizer_step(
             optimizer.step(closure=closure)
 
         def training_step(self, batch, batch_idx, optimizer_idx=None):
-            return super().training_step(batch, batch_idx)
+            loss = super().training_step(batch, batch_idx)
+            # make sure the model is untoggle when returning None
+            return loss if batch_idx % 2 == 0 else None
 
         @staticmethod
         def combine_generators(gen_1, gen_2):