Lightning-AI · justusschock · Nov 9, 2020 · Nov 9, 2020 · Nov 9, 2020 · Nov 9, 2020
diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py
@@ -15,14 +15,12 @@
 import os
 import platform
 import time
-from typing import Type, Union
+from typing import Type
 
 import pytest
 import torch
 
 from pytorch_lightning import seed_everything, Trainer
-from pytorch_lightning.plugins.ddp_plugin import DDPPlugin
-from pytorch_lightning.plugins.sharded_plugin import DDPShardedPlugin
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, _NATIVE_AMP_AVAILABLE
 from tests.backends import DDPLauncher
 from tests.base.boring_model import BoringModel, RandomDataset
@@ -32,10 +30,8 @@
 @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_one_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=1,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -45,11 +41,9 @@ def test_ddp_sharded_plugin_correctness_one_gpu():
 @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_amp_one_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=1,
         precision=16,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -59,10 +53,8 @@ def test_ddp_sharded_plugin_correctness_amp_one_gpu():
 @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_multi_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=2,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -73,11 +65,9 @@ def test_ddp_sharded_plugin_correctness_multi_gpu():
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_amp_multi_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=2,
         precision=16,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -88,11 +78,9 @@ def test_ddp_sharded_plugin_correctness_amp_multi_gpu():
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=2,
         precision=16,
-        accelerator='ddp_spawn',
-        plugin='ddp_sharded',
         model_cls=SeedTrainLoaderModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -104,11 +92,9 @@ def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
                     reason="test should be run outside of pytest")
 @DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32")
 def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.accelerator,
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -119,11 +105,9 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
                     reason="test should be run outside of pytest")
 @DDPLauncher.run("--accelerator ddp --gpus 2  --precision 16")
 def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None):
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.accelerator,
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -136,10 +120,8 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_multi_optim():
     """
         Ensures same results using multiple optimizers across multiple GPUs
     """
-    plugin_parity_test(
-        plugin=DDPShardedPlugin(),
+    sharded_parity_test(
         gpus=2,
-        accelerator='ddp_spawn',
         model_cls=SeedTrainLoaderMultipleOptimizersModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -153,10 +135,8 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_multi_optim_manual(tmpdir):
     """
         Ensures using multiple optimizers across multiple GPUs with manual optimization
     """
-    plugin_parity_test(
-        plugin=DDPShardedPlugin(),
+    sharded_parity_test(
         gpus=2,
-        accelerator='ddp_spawn',
         model_cls=SeedTrainLoaderManualModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -253,11 +233,9 @@ def record_ddp_fit_model_stats(trainer, model, use_cuda):
     return max_memory, total_time
 
 
-def plugin_parity_test(
+def sharded_parity_test(
         model_cls: Type[SeedTrainLoaderModel],
-        plugin: Union[str, DDPPlugin],
         seed: int = 42,
-        accelerator: str = 'ddp_spawn',
         gpus: int = 0,
         precision: int = 32,
         max_percent_speed_diff: float = 0.1,
@@ -268,9 +246,7 @@ def plugin_parity_test(
 
     Args:
         model_cls: Model class to use for test.
-        plugin: Plugin to parity test.
         seed: Seed for generators. Note that this does not handle the seed for data-loading on multi-process.
-        accelerator: Accelerator type for test.
         gpus: Number of GPUS to enable.
         precision: Whether to use AMP or normal FP32 training.
         max_percent_speed_diff: The maximum speed difference compared to normal DDP training.
@@ -288,7 +264,7 @@ def plugin_parity_test(
         max_epochs=1,
         gpus=gpus,
         precision=precision,
-        accelerator=accelerator,
+        accelerator='ddp_spawn',
     )
 
     max_memory_ddp, ddp_time = record_ddp_fit_model_stats(
@@ -306,8 +282,7 @@ def plugin_parity_test(
         max_epochs=1,
         gpus=gpus,
         precision=precision,
-        accelerator=accelerator,
-        plugins=[plugin],
+        accelerator='ddp_sharded_spawn',
     )
 
     max_memory_custom, custom_model_time = record_ddp_fit_model_stats(

diff --git a/pl_examples/basic_examples/conv_sequential_example.py b/pl_examples/basic_examples/conv_sequential_example.py
@@ -32,7 +32,7 @@
 from pl_examples import cli_lightning_logo
 from pytorch_lightning import Trainer
 from pytorch_lightning.metrics.functional import accuracy
-from pytorch_lightning.plugins.ddp_sequential_plugin import DDPSequentialPlugin
+from pytorch_lightning.plugins.legacy.ddp_sequential_plugin import DDPSequentialPlugin
 from pytorch_lightning.utilities import _BOLTS_AVAILABLE, _FAIRSCALE_PIPE_AVAILABLE
 
 if _BOLTS_AVAILABLE:

diff --git a/pl_examples/bug_report_model.py b/pl_examples/bug_report_model.py
@@ -55,24 +55,23 @@ class BoringModel(LightningModule):
     def __init__(self):
         """
         Testing PL Module
-
         Use as follows:
         - subclass
         - modify the behavior for what you want
-
         class TestModel(BaseTestModel):
             def training_step(...):
                 # do your own thing
-
         or:
-
         model = BaseTestModel()
         model.training_epoch_end = None
-
         """
         super().__init__()
         self.layer = torch.nn.Linear(32, 2)
 
+    @property
+    def automatic_optimization(self):
+        return True
+
     def forward(self, x):
         return self.layer(x)
 
@@ -81,7 +80,7 @@ def loss(self, batch, prediction):
         return torch.nn.functional.mse_loss(prediction, torch.ones_like(prediction))
 
     def step(self, x):
-        x = self.layer(x)
+        x = self(x)
         out = torch.nn.functional.mse_loss(x, torch.ones_like(x))
         return out
 

diff --git a/pytorch_lightning/accelerators/__init__.py b/pytorch_lightning/accelerators/__init__.py
@@ -1,25 +1,4 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from pytorch_lightning.accelerators.accelerator import Accelerator  # noqa: F401
-from pytorch_lightning.accelerators.cpu_accelerator import CPUAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.ddp2_accelerator import DDP2Accelerator  # noqa: F401
-from pytorch_lightning.accelerators.ddp_accelerator import DDPAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.ddp_cpu_hpc_accelerator import DDPCPUHPCAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.ddp_cpu_spawn_accelerator import DDPCPUSpawnAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.ddp_hpc_accelerator import DDPHPCAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.ddp_spawn_accelerator import DDPSpawnAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.dp_accelerator import DataParallelAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.gpu_accelerator import GPUAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.horovod_accelerator import HorovodAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.tpu_accelerator import TPUAccelerator  # noqa: F401
+from pytorch_lightning.accelerators.accelerator import Accelerator
+from pytorch_lightning.accelerators.cpu import CPUAccelerator
+from pytorch_lightning.accelerators.gpu import GPUAccelerator
+from pytorch_lightning.accelerators.tpu import TPUAccelerator