Merge branch 'master' into refactor/extract-dataloader-utils

Lightning-AI · Nov 19, 2021 · 626ff06 · 626ff06
2 parents e729650 + c09c9c7
commit 626ff06
Show file tree

Hide file tree

Showing 46 changed files with 384 additions and 195 deletions.
diff --git a/.azure-pipelines/gpu-benchmark.yml b/.azure-pipelines/gpu-benchmark.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
       - bash: |
-          python -m pytest benchmarks -v --durations=0
+          python -m pytest tests/benchmarks -v --durations=0
         displayName: 'Testing: benchmarks'
         env:
           PL_RUNNING_BENCHMARKS: 1
diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml
@@ -68,7 +68,7 @@ jobs:
       displayName: 'Get legacy checkpoints'
 
     - bash: |
-        python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
+        python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests --ignore tests/benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
       displayName: 'Testing: standard'
 
     - bash: |
@@ -113,5 +113,5 @@ jobs:
       displayName: 'Testing: examples'
 
     - bash: |
-        python -m pytest benchmarks -v --maxfail=2 --durations=0
+        python -m pytest tests/benchmarks -v --maxfail=2 --durations=0
       displayName: 'Testing: benchmarks'
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
--
+- Added a flag `SLURMEnvironment(auto_requeue=True|False)` to control whether Lightning handles the requeuing ([#10601](https://github.com/PyTorchLightning/pytorch-lightning/issues/10601))
 
 
 -
@@ -37,6 +37,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Raise `MisconfigurationException` when `enable_progress_bar=False` and a progress bar instance has been passed in the callback list ([#10520](https://github.com/PyTorchLightning/pytorch-lightning/issues/10520))
 
 
+- Renamed `refresh_rate_per_second` parameter to `referesh_rate` for `RichProgressBar` signature ([#10497](https://github.com/PyTorchLightning/pytorch-lightning/pull/10497))
+
+
+- Moved ownership of the `PrecisionPlugin` into `TrainingTypePlugin` and updated all references ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
+
+
 -
 
 
@@ -50,7 +56,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/PyTorchLightning/pytorch-lightning/pull/10505))
 
 
--
+- Deprecated the `precision_plugin` constructor argument from `Accelerator` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
 
 
 -
@@ -139,11 +145,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `reload_dataloaders_every_epoch` from `Trainer` in favour of `reload_dataloaders_every_n_epochs` ([#10481](https://github.com/PyTorchLightning/pytorch-lightning/pull/10481))
 
 
+- Removed the `precision_plugin` attribute from `Accelerator` in favor of its equivalent attribute `precision_plugin` in the `TrainingTypePlugin` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570))
 
 ### Fixed
 
 
--
+- Fixed signals being registered within threads ([#10610](https://github.com/PyTorchLightning/pytorch-lightning/pull/10610))
 
 
 -

diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/docs/source/clouds/cluster.rst b/docs/source/clouds/cluster.rst
@@ -210,6 +210,14 @@ To get this behavior make sure to add the correct signal to your SLURM script
     # 90 seconds before training ends
     SBATCH --signal=SIGUSR1@90
 
+If auto-resubmit is not desired, it can be turned off in the :class:`~pytorch_lightning.plugins.environments.slurm_environment.SLURMEnvironment` plugin:
+
+.. code-block:: python
+
+    from pytorch_lightning.plugins import SLURMEnvironment
+
+    trainer = Trainer(plugins=[SLURMEnvironment(auto_requeue=False)])
+
 
 Building SLURM scripts
 ----------------------

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,6 @@ requires = [
 
 [tool.isort]
 known_first_party = [
-    "benchmarks",
     "docs",
     "pl_examples",
     "pytorch_lightning",
@@ -24,7 +23,7 @@ line-length = 120
 
 
 [tool.mypy]
-files = ["pytorch_lightning", "pl_examples", "benchmarks"]
+files = ["pytorch_lightning"]
 disallow_untyped_defs = "True"
 ignore_missing_imports = "True"
 show_error_codes = "True"
@@ -53,9 +52,6 @@ module = [
     "pytorch_lightning.distributed.*",
     "pytorch_lightning.tuner.*",
     "pytorch_lightning.utilities.*",
-    "pl_examples.*",
-    "benchmarks.*",
-    "tests.helpers.*"
 ]
 ignore_errors = "True"
 

diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
@@ -25,6 +25,7 @@
 from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin, PrecisionPlugin
 from pytorch_lightning.plugins.training_type import DataParallelPlugin, TrainingTypePlugin
 from pytorch_lightning.trainer.states import TrainerFn
+from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
 from pytorch_lightning.utilities.enums import AMPType, LightningEnum
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -44,15 +45,23 @@ class Accelerator:
     One to handle differences from the training routine and one to handle different precisions.
     """
 
-    def __init__(self, precision_plugin: PrecisionPlugin, training_type_plugin: TrainingTypePlugin) -> None:
+    def __init__(self, precision_plugin: Optional[PrecisionPlugin], training_type_plugin: TrainingTypePlugin) -> None:
         """
         Args:
             precision_plugin: the plugin to handle precision-specific parts
+
+                .. deprecated::
+                    The ``precision_plugin`` parameter has been deprecated and will be removed soon.
+                    Pass the precision plugin as a parameter to the ``TrainingTypePlugin`` instead.
+
             training_type_plugin: the plugin to handle different training routines
         """
-        self.precision_plugin = precision_plugin
+
         self.training_type_plugin = training_type_plugin
 
+        if precision_plugin is not None:
+            self.training_type_plugin._precision_plugin = precision_plugin
+
         self.optimizers: List = []
         self.lr_schedulers: List = []
         self.optimizer_frequencies: List = []
@@ -84,7 +93,7 @@ def pre_dispatch(self, trainer: "pl.Trainer") -> None:
         if self.training_type_plugin.setup_optimizers_in_pre_dispatch:
             self.setup_optimizers(trainer)
 
-        self.precision_plugin.pre_dispatch()
+        self.training_type_plugin.precision_plugin.pre_dispatch()
 
     def _move_optimizer_state(self, device: Optional[torch.device] = None) -> None:
         """Moves the state of the optimizers to the GPU if needed."""
@@ -96,12 +105,12 @@ def _move_optimizer_state(self, device: Optional[torch.device] = None) -> None:
     def dispatch(self, trainer: "pl.Trainer") -> None:
         """Hook to do something before the training/evaluation/prediction starts."""
         self.training_type_plugin.dispatch(trainer)
-        self.precision_plugin.dispatch(trainer)
+        self.training_type_plugin.precision_plugin.dispatch(trainer)
 
     def post_dispatch(self, trainer: "pl.Trainer") -> None:
         """Hook to do something after the training/evaluation/prediction starts."""
         self.training_type_plugin.post_dispatch(trainer)
-        self.precision_plugin.post_dispatch()
+        self.training_type_plugin.precision_plugin.post_dispatch()
 
     @property
     def model(self) -> Module:
@@ -159,31 +168,31 @@ def training_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> STEP_OUTPUT:
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.training_step` for more details
         """
-        with self.precision_plugin.train_step_context():
+        with self.training_type_plugin.precision_plugin.train_step_context():
             return self.training_type_plugin.training_step(*step_kwargs.values())
 
     def validation_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> Optional[STEP_OUTPUT]:
         """The actual validation step.
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.validation_step` for more details
         """
-        with self.precision_plugin.val_step_context():
+        with self.training_type_plugin.precision_plugin.val_step_context():
             return self.training_type_plugin.validation_step(*step_kwargs.values())
 
     def test_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> Optional[STEP_OUTPUT]:
         """The actual test step.
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.test_step` for more details
         """
-        with self.precision_plugin.test_step_context():
+        with self.training_type_plugin.precision_plugin.test_step_context():
             return self.training_type_plugin.test_step(*step_kwargs.values())
 
     def predict_step(self, step_kwargs: Dict[str, Union[Any, int]]) -> STEP_OUTPUT:
         """The actual predict step.
 
         See :meth:`~pytorch_lightning.core.lightning.LightningModule.predict_step` for more details
         """
-        with self.precision_plugin.predict_step_context():
+        with self.training_type_plugin.precision_plugin.predict_step_context():
             return self.training_type_plugin.predict_step(*step_kwargs.values())
 
     def backward(self, closure_loss: Tensor, *args: Any, **kwargs: Any) -> Tensor:
@@ -193,11 +202,11 @@ def backward(self, closure_loss: Tensor, *args: Any, **kwargs: Any) -> Tensor:
             closure_loss: a tensor holding the loss value to backpropagate
         """
         self.training_type_plugin.pre_backward(closure_loss)
-        closure_loss = self.precision_plugin.pre_backward(self.lightning_module, closure_loss)
+        closure_loss = self.training_type_plugin.precision_plugin.pre_backward(self.lightning_module, closure_loss)
 
-        self.precision_plugin.backward(self.lightning_module, closure_loss, *args, **kwargs)
+        self.training_type_plugin.precision_plugin.backward(self.lightning_module, closure_loss, *args, **kwargs)
 
-        closure_loss = self.precision_plugin.post_backward(self.lightning_module, closure_loss)
+        closure_loss = self.training_type_plugin.precision_plugin.post_backward(self.lightning_module, closure_loss)
         self.training_type_plugin.post_backward(closure_loss)
 
         return closure_loss
@@ -208,7 +217,7 @@ def optimizer_step(
         opt_idx: int,
         closure: Callable[[], Any],
         model: Optional[Union["pl.LightningModule", Module]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> None:
         """performs the actual optimizer step.
 
@@ -220,7 +229,7 @@ def optimizer_step(
             **kwargs: Any extra arguments to ``optimizer.step``
         """
         model = model or self.lightning_module
-        self.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, **kwargs)
+        self.training_type_plugin.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, **kwargs)
 
     def optimizer_zero_grad(self, current_epoch: int, batch_idx: int, optimizer: Optimizer, opt_idx: int) -> None:
         """Zeros all model parameter's gradients."""
@@ -248,26 +257,38 @@ def setup_training_type_plugin(self) -> None:
 
     def setup_precision_plugin(self) -> None:
         """Attaches the precision plugin to the accelerator."""
-        model, optimizers, schedulers = self.precision_plugin.connect(self.model, self.optimizers, self.lr_schedulers)
+        model, optimizers, schedulers = self.training_type_plugin.precision_plugin.connect(
+            self.model, self.optimizers, self.lr_schedulers
+        )
         self.model = model
         self.optimizers = optimizers
         self.lr_schedulers = schedulers
 
     @property
     def amp_backend(self) -> Optional[LightningEnum]:
-        if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin):
+        if isinstance(self.training_type_plugin.precision_plugin, ApexMixedPrecisionPlugin):
             return AMPType.APEX
-        if isinstance(self.precision_plugin, NativeMixedPrecisionPlugin):
+        if isinstance(self.training_type_plugin.precision_plugin, NativeMixedPrecisionPlugin):
             return AMPType.NATIVE
         return None
 
     @property
     def precision(self) -> Union[str, int]:
-        return self.precision_plugin.precision
+        """The type of precision being used with this accelerator.
+
+        .. deprecated::
+            This property been deprecated and will be removed soon.
+            Use ``training_type_plugin.precision_plugin.precision`` instead.
+        """
+        rank_zero_deprecation(
+            f"`{self.__class__.__name__}.precision` has been deprecated and will be removed soon"
+            f" Use `training_type_plugin.precision_plugin.precision` instead."
+        )
+        return self.training_type_plugin.precision_plugin.precision
 
     @property
     def scaler(self) -> Optional["GradScaler"]:
-        return getattr(self.precision_plugin, "scaler", None)
+        return getattr(self.training_type_plugin.precision_plugin, "scaler", None)
 
     def optimizer_state(self, optimizer: Optimizer) -> Dict[str, Tensor]:
         """Returns state of an optimizer.

diff --git a/pytorch_lightning/accelerators/tpu.py b/pytorch_lightning/accelerators/tpu.py
@@ -36,10 +36,11 @@ def setup(self, trainer: "pl.Trainer") -> None:
             ValueError:
                 If the precision or training type plugin are unsupported.
         """
-        if not isinstance(self.precision_plugin, TPUPrecisionPlugin):
+        if not isinstance(self.training_type_plugin.precision_plugin, TPUPrecisionPlugin):
             # this configuration should have been avoided in the accelerator connector
             raise ValueError(
-                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`, found: {self.precision_plugin}."
+                f"The `TPUAccelerator` can only be used with a `TPUPrecisionPlugin`,"
+                f" found: {self.training_type_plugin.precision_plugin}."
             )
         if not isinstance(self.training_type_plugin, (SingleTPUPlugin, TPUSpawnPlugin)):
             raise ValueError(