Merge branch 'master' into bugfix/horovodrun-detection

Lightning-AI · Jul 30, 2021 · a1542b3 · a1542b3
2 parents 335bf50 + c99e2fe
commit a1542b3
Show file tree

Hide file tree

Showing 15 changed files with 91 additions and 119 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -58,28 +58,30 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `metrics` ([#8586](https://github.com/PyTorchLightning/pytorch-lightning/pull/8586/))
 
 
-- Removed the `outputs` argument in both the `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#8587](https://github.com/PyTorchLightning/pytorch-lightning/pull/8587))
+- Removed the deprecated `outputs` argument in both the `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#8587](https://github.com/PyTorchLightning/pytorch-lightning/pull/8587))
 
 
--
 
+- Delete the deprecated `TrainerLoggingMixin` class ([#8609](https://github.com/PyTorchLightning/pytorch-lightning/pull/8609))
 
--
 
 
--
+- Removed the deprecated `optimizer_idx` from `training_step` as an accepted argument in manual optimization ([#8576](https://github.com/PyTorchLightning/pytorch-lightning/pull/8576))
+
 
-### Fixed
 
 - Fixed horovod auto-detection when horovod is not installed and the launcher is `mpirun` ([#8610](https://github.com/PyTorchLightning/pytorch-lightning/pull/8610))
 
+### Fixed
 
 -
 
 
 -
 
 
+- Fixed `trainer.fit_loop.split_idx` always returning `None` ([#8601](https://github.com/PyTorchLightning/pytorch-lightning/pull/8601))
+
 -
 
 

diff --git a/pytorch_lightning/callbacks/stochastic_weight_avg.py b/pytorch_lightning/callbacks/stochastic_weight_avg.py
@@ -195,7 +195,10 @@ def on_train_epoch_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningMo
                 scheduler_cfg = trainer.lr_schedulers[0]
                 if scheduler_cfg["interval"] != "epoch" or scheduler_cfg["frequency"] != 1:
                     rank_zero_warn(f"SWA is currently only supported every epoch. Found {scheduler_cfg}")
-                rank_zero_info(f"Swapping scheduler {scheduler_cfg['scheduler']} for {self._swa_scheduler}")
+                rank_zero_info(
+                    f"Swapping scheduler `{scheduler_cfg['scheduler'].__class__.__name__}`"
+                    f" for `{self._swa_scheduler.__class__.__name__}`"
+                )
                 trainer.lr_schedulers[0] = default_scheduler_cfg
             else:
                 trainer.lr_schedulers.append(default_scheduler_cfg)

diff --git a/pytorch_lightning/loops/batch/training_batch_loop.py b/pytorch_lightning/loops/batch/training_batch_loop.py
@@ -47,6 +47,7 @@ def __init__(self) -> None:
         self.accumulated_loss: Optional[Tensor] = None
         self.batch_outputs: Optional[List[List[STEP_OUTPUT]]] = None
         self.running_loss: TensorRunningAccum = TensorRunningAccum(window_length=20)
+        # the current split index when the batch gets split into chunks in truncated backprop through time
         self.split_idx: Optional[int] = None
         self.optim_progress = OptimizationProgress()
 
@@ -159,19 +160,20 @@ def num_active_optimizers(self, batch_idx: Optional[int] = None) -> int:
         return len(self.get_active_optimizers(batch_idx))
 
     def _run_optimization(
-        self, batch_idx: int, split_batch: Any, opt_idx: int = 0, optimizer: Optional[torch.optim.Optimizer] = None
+        self,
+        batch_idx: int,
+        split_batch: Any,
+        opt_idx: Optional[int] = None,
+        optimizer: Optional[torch.optim.Optimizer] = None,
     ):
         """Runs closure (train step + backward) together with optimization if necessary.
 
         Args:
             batch_idx: the index of the current batch
             split_batch: the current tbptt split of the whole batch
-            opt_idx: the index of the current optimizer
-            optimizer: the current optimizer
+            opt_idx: the index of the current optimizer or `None` in case of manual optimization
+            optimizer: the current optimizer or `None` in case of manual optimization
         """
-        # TODO(@awaelchli): In v1.5, when optimizer_idx gets removed from training_step in manual_optimization, change
-        #   opt_idx=0 to opt_idx=None in the signature here
-
         # toggle model params
         self._run_optimization_start(opt_idx, optimizer)
 
@@ -624,10 +626,10 @@ def _build_kwargs(self, batch: Any, batch_idx: int, opt_idx: int, hiddens: Optio
             has_opt_idx_in_train_step = is_param_in_hook_signature(training_step_fx, "optimizer_idx")
             if has_opt_idx_in_train_step:
                 if not lightning_module.automatic_optimization:
-                    self._warning_cache.deprecation(
-                        "`training_step` hook signature has changed in v1.3."
-                        " `optimizer_idx` argument has been removed in case of manual optimization. Support for"
-                        " the old signature will be removed in v1.5"
+                    raise ValueError(
+                        "Your `LightningModule.training_step` signature contains an `optimizer_idx` argument but"
+                        " in manual optimization optimizers must be handled by the user. Remove the optimizer_idx"
+                        " argument or set `self.automatic_optimization = True`."
                     )
                 step_kwargs["optimizer_idx"] = opt_idx
             elif not has_opt_idx_in_train_step and lightning_module.automatic_optimization:

diff --git a/pytorch_lightning/loops/epoch/training_epoch_loop.py b/pytorch_lightning/loops/epoch/training_epoch_loop.py
@@ -42,8 +42,6 @@ def __init__(self, min_steps: int, max_steps: int):
         self.global_step: int = 0
         # the total batch index across all epochs
         self.total_batch_idx: int = 0
-        # the current split index when the batch gets split into chunks in truncated backprop through time
-        self.split_idx: Optional[int] = None
         self.is_last_batch: Optional[bool] = None
         self.batch_progress = Progress()
         self.scheduler_progress = SchedulerProgress()

diff --git a/pytorch_lightning/loops/fit_loop.py b/pytorch_lightning/loops/fit_loop.py
@@ -74,7 +74,7 @@ def batch_idx(self) -> int:
     @property
     def split_idx(self) -> int:
         """Returns the index of the current batch split (within the current batch) for bptt"""
-        return self.epoch_loop.split_idx
+        return self.epoch_loop.batch_loop.split_idx
 
     @property
     def min_steps(self) -> int:

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -748,7 +748,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
                 self.distributed_backend = "ddp_spawn"
 
         # special case with DDP on CPUs
-        if self.distributed_backend == "ddp_cpu":
+        if self.distributed_backend == DistributedType.DDP_CPU:
             if _TPU_AVAILABLE:
                 raise MisconfigurationException(
                     "`accelerator='ddp_cpu'` is not supported on TPU machines. "
@@ -803,7 +803,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
             self.num_processes = self.num_nodes
 
         # Horovod is an extra case...
-        if self.distributed_backend == "horovod":
+        if self.distributed_backend == DistributedType.HOROVOD:
             self._set_horovod_backend()
 
         using_valid_distributed = self.use_ddp or self.use_ddp2

diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
@@ -241,9 +241,9 @@ def on_batch_start(self) -> None:
         self._epoch_end_reached = False
 
     def epoch_end_reached(self):
-        self.trainer.logger_connector._epoch_end_reached = True
-        self.trainer.logger_connector._batch_idx = None
-        self.trainer.logger_connector._split_idx = None
+        self._epoch_end_reached = True
+        self._batch_idx = None
+        self._split_idx = None
 
     def on_epoch_end(self) -> None:
         assert self._epoch_end_reached

diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
@@ -58,7 +58,6 @@
 from pytorch_lightning.trainer.connectors.training_trick_connector import TrainingTricksConnector
 from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
 from pytorch_lightning.trainer.deprecated_api import DeprecatedTrainerAttributes
-from pytorch_lightning.trainer.logging import TrainerLoggingMixin
 from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin
 from pytorch_lightning.trainer.optimizers import TrainerOptimizersMixin
 from pytorch_lightning.trainer.properties import TrainerProperties
@@ -97,7 +96,6 @@ class Trainer(
     TrainerCallbackHookMixin,
     TrainerModelHooksMixin,
     TrainerOptimizersMixin,
-    TrainerLoggingMixin,
     TrainerTrainingTricksMixin,
     TrainerDataLoadingMixin,
     DeprecatedTrainerAttributes,

diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py
@@ -40,7 +40,7 @@ def __hash__(self) -> int:
 class AMPType(LightningEnum):
     """Type of Automatic Mixed Precission used for training.
 
-    >>> # you can math the type with string
+    >>> # you can match the type with string
     >>> AMPType.APEX == 'apex'
     True
     """
@@ -52,7 +52,7 @@ class AMPType(LightningEnum):
 class DistributedType(LightningEnum):
     """Define type of ditributed computing.
 
-    >>> # you can math the type with string
+    >>> # you can match the type with string
     >>> DistributedType.DDP == 'ddp'
     True
     >>> # which is case invariant
@@ -77,6 +77,7 @@ def is_interactive_compatible(self) -> bool:
     DP = "dp"
     DDP = "ddp"
     DDP2 = "ddp2"
+    DDP_CPU = "ddp_cpu"
     DDP_SPAWN = "ddp_spawn"
     TPU_SPAWN = "tpu_spawn"
     DEEPSPEED = "deepspeed"
@@ -87,11 +88,11 @@ def is_interactive_compatible(self) -> bool:
 
 
 class DeviceType(LightningEnum):
-    """Define Device type byt its nature - acceleatrors.
+    """Define Device type by its nature - acceleatrors.
 
     >>> DeviceType.CPU == DeviceType.from_str('cpu')
     True
-    >>> # you can math the type with string
+    >>> # you can match the type with string
     >>> DeviceType.GPU == 'GPU'
     True
     >>> # which is case invariant

diff --git a/tests/callbacks/test_stochastic_weight_avg.py b/tests/callbacks/test_stochastic_weight_avg.py
@@ -175,7 +175,7 @@ def test_swa_warns(tmpdir, caplog):
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, stochastic_weight_avg=True)
     with caplog.at_level(level=logging.INFO), pytest.warns(UserWarning, match="SWA is currently only supported"):
         trainer.fit(model)
-    assert "Swapping scheduler" in caplog.text
+    assert "Swapping scheduler `StepLR` for `SWALR`" in caplog.text
 
 
 def test_swa_raises():

diff --git a/tests/deprecated_api/test_remove_1-5.py b/tests/deprecated_api/test_remove_1-5.py
@@ -166,26 +166,6 @@ def test_v1_5_0_running_sanity_check():
         assert not trainer.running_sanity_check
 
 
-def test_old_training_step_signature_with_opt_idx_manual_opt(tmpdir):
-    class OldSignatureModel(BoringModel):
-        def __init__(self):
-            super().__init__()
-            self.automatic_optimization = False
-
-        def training_step(self, batch, batch_idx, optimizer_idx):
-            assert optimizer_idx == 0
-            return super().training_step(batch, batch_idx)
-
-        def configure_optimizers(self):
-            return [optim.SGD(self.parameters(), lr=1e-2), optim.SGD(self.parameters(), lr=1e-2)]
-
-    model = OldSignatureModel()
-    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=2)
-
-    with pytest.deprecated_call(match="`training_step` .* `optimizer_idx` .* manual .* will be removed in v1.5"):
-        trainer.fit(model)
-
-
 def test_v1_5_0_model_checkpoint_period(tmpdir):
     with no_warning_call(DeprecationWarning):
         ModelCheckpoint(dirpath=tmpdir)
@@ -247,12 +227,6 @@ def test_epoch_end(self, outputs):
         trainer.test(model)
 
 
-def test_v1_5_0_trainer_logging_mixin(tmpdir):
-    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, checkpoint_callback=False, logger=False)
-    with pytest.deprecated_call(match="is deprecated in v1.3 and will be removed in v1.5"):
-        trainer.metrics_to_scalars({})
-
-
 def test_v1_5_0_lighting_module_grad_norm(tmpdir):
     model = BoringModel()
     with pytest.deprecated_call(match="is deprecated in v1.3 and will be removed in v1.5"):

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
@@ -213,16 +213,22 @@ def get_members(cls):
 
 class HookedCallback(Callback):
     def __init__(self, called):
-        def call(hook, *args, **kwargs):
+        def call(hook, fn, *args, **kwargs):
+            out = fn(*args, **kwargs)
             d = {"name": f"Callback.{hook}"}
             if args:
                 d["args"] = args
             if kwargs:
                 d["kwargs"] = kwargs
             called.append(d)
+            return out
 
         for h in get_members(Callback):
-            setattr(self, h, partial(call, h))
+            attr = getattr(self, h)
+            setattr(self, h, partial(call, h, attr))
+
+    def on_save_checkpoint(*args, **kwargs):
+        return {"foo": True}
 
 
 class HookedModel(BoringModel):
@@ -555,7 +561,12 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
     # initial training to get a checkpoint
     model = BoringModel()
     trainer = Trainer(
-        default_root_dir=tmpdir, max_steps=1, limit_val_batches=0, progress_bar_refresh_rate=0, weights_summary=None
+        default_root_dir=tmpdir,
+        max_steps=1,
+        limit_val_batches=0,
+        progress_bar_refresh_rate=0,
+        weights_summary=None,
+        callbacks=[HookedCallback([])],
     )
     trainer.fit(model)
     best_model_path = trainer.checkpoint_callback.best_model_path
@@ -611,6 +622,7 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
                 },
             ),
         ),
+        dict(name="Callback.on_load_checkpoint", args=(trainer, model, {"foo": True})),
         dict(name="configure_sharded_model"),
         dict(name="Callback.on_configure_sharded_model", args=(trainer, model)),
         dict(name="configure_optimizers"),