diff --git a/.github/workflows/ci_test-base.yml b/.github/workflows/ci_test-base.yml
index afd86ca98c213..c0b97439737ff 100644
--- a/.github/workflows/ci_test-base.yml
+++ b/.github/workflows/ci_test-base.yml
@@ -76,8 +76,7 @@ jobs:
       with:
         name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}
         path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
-      # Use always() to always run this step to publish test results when there are test failures
-      if: always()
+      if: failure()
 
     - name: Statistics
       if: success()
diff --git a/.github/workflows/ci_test-conda.yml b/.github/workflows/ci_test-conda.yml
index 7fc4de8ddbfd3..d64fedbfbe590 100644
--- a/.github/workflows/ci_test-conda.yml
+++ b/.github/workflows/ci_test-conda.yml
@@ -50,5 +50,4 @@ jobs:
       with:
         name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}
         path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
-      # Use always() to always run this step to publish test results when there are test failures
-      if: always()
+      if: failure()
diff --git a/.github/workflows/ci_test-full.yml b/.github/workflows/ci_test-full.yml
index 1a2115a40fcfd..b87a1d8557843 100644
--- a/.github/workflows/ci_test-full.yml
+++ b/.github/workflows/ci_test-full.yml
@@ -129,8 +129,7 @@ jobs:
       with:
         name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}
         path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
-      # Use always() to always run this step to publish test results when there are test failures
-      if: always()
+      if: failure()
 
     - name: Statistics
       if: success()
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b6b5e36a8ce4..f078349ef3665 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Fixed trainer by default `None` in `DDPAccelerator` ([#4915](https://github.com/PyTorchLightning/pytorch-lightning/pull/4915))
+
+
+- Fixed `LightningOptimizer` exposes optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095))
+
 
 
 ## [1.1.0] - 2020-12-09
@@ -80,9 +85,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
-- Removed `multiclass_roc` and `multiclass_precision_recall_curve`, use `roc` and `precision_recall_curve` instead ([#4549](https://github.com/PyTorchLightning/pytorch-lightning/pull/4549))
 - Tuner algorithms will be skipped if `fast_dev_run=True` ([#3903](https://github.com/PyTorchLightning/pytorch-lightning/pull/3903))
-- WandbLogger does not force wandb `reinit` arg to True anymore and creates a run only when needed ([#4648](https://github.com/PyTorchLightning/pytorch-lightning/pull/4648))
+- `WandbLogger` does not force wandb `reinit` arg to True anymore and creates a run only when needed ([#4648](https://github.com/PyTorchLightning/pytorch-lightning/pull/4648))
 - Changed `automatic_optimization` to be a model attribute ([#4602](https://github.com/PyTorchLightning/pytorch-lightning/pull/4602))
 - Changed `Simple Profiler` report to order by percentage time spent + num calls ([#4880](https://github.com/PyTorchLightning/pytorch-lightning/pull/4880))
 - Simplify optimization Logic ([#4984](https://github.com/PyTorchLightning/pytorch-lightning/pull/4984))
@@ -100,6 +104,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 ### Removed
 
 - Removed `reorder` parameter of the `auc` metric ([#5004](https://github.com/PyTorchLightning/pytorch-lightning/pull/5004))
+- Removed `multiclass_roc` and `multiclass_precision_recall_curve`, use `roc` and `precision_recall_curve` instead ([#4549](https://github.com/PyTorchLightning/pytorch-lightning/pull/4549))
 
 ### Fixed
 
diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst
index d6d082e2ed779..d4cf578e10bda 100644
--- a/docs/source/introduction_guide.rst
+++ b/docs/source/introduction_guide.rst
@@ -601,8 +601,8 @@ In this method we do all the preparation we need to do once (instead of on every
         def setup(self, stage):
             # transform
             transform=transforms.Compose([transforms.ToTensor()])
-            MNIST(os.getcwd(), train=True, download=False, transform=transform)
-            MNIST(os.getcwd(), train=False, download=False, transform=transform)
+            mnist_train = MNIST(os.getcwd(), train=True, download=False, transform=transform)
+            mnist_test = MNIST(os.getcwd(), train=False, download=False, transform=transform)
 
             # train/val split
             mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])
diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst
index def47810504d6..b3e0b905f27f4 100644
--- a/docs/source/multi_gpu.rst
+++ b/docs/source/multi_gpu.rst
@@ -663,7 +663,7 @@ It is highly recommended to use Sharded Training in multi-GPU environments where
 A technical note: as batch size scales, storing activations for the backwards pass becomes the bottleneck in training. As a result, sharding optimizer state and gradients becomes less impactful.
 Work within the future will bring optional sharding to activations and model parameters to reduce memory further, but come with a speed cost.
 
-To use Sharded Training, you need to first install FairScale using the command below or install all extras using ``pip install pytorch-lightning["extra"]``.
+To use Sharded Training, you need to first install FairScale using the command below.
 
 .. code-block:: bash
 
diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst
index 6ca72b8069d6d..06e6e9679d29f 100644
--- a/docs/source/optimizers.rst
+++ b/docs/source/optimizers.rst
@@ -191,37 +191,48 @@ override the :meth:`optimizer_step` function.
 
 For example, here step optimizer A every 2 batches and optimizer B every 4 batches
 
-.. testcode::
+.. note:: When using Trainer(enable_pl_optimizer=True), there is no need to call `.zero_grad()`.
 
-    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False):
-        optimizer.step()
+.. testcode::
 
     def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx):
       optimizer.zero_grad()
 
     # Alternating schedule for optimizer steps (ie: GANs)
-    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False):
+    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False):
         # update generator opt every 2 steps
         if optimizer_i == 0:
             if batch_nb % 2 == 0 :
-                optimizer.step()
-                optimizer.zero_grad()
+               optimizer.step(closure=closure)
 
         # update discriminator opt every 4 steps
         if optimizer_i == 1:
             if batch_nb % 4 == 0 :
-                optimizer.step()
-                optimizer.zero_grad()
+               optimizer.step(closure=closure)
+
+.. note:: When using ``Trainer(enable_pl_optimizer=True)``, ``.step`` accepts a boolean ``make_optimizer_step`` which can be used as follow.
+
+.. testcode::
+
+    def optimizer_zero_grad(self, current_epoch, batch_idx, optimizer, opt_idx):
+      optimizer.zero_grad()
+
+    # Alternating schedule for optimizer steps (ie: GANs)
+    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False):
+        # update generator opt every 2 steps
+        if optimizer_i == 0:
+            optimizer.step(closure=closure, make_optimizer_step=(batch_nb % 2) == 0)
 
-        # ...
-        # add as many optimizers as you want
+        # update discriminator opt every 4 steps
+        if optimizer_i == 1:
+            optimizer.step(closure=closure, make_optimizer_step=(batch_nb % 4) == 0)
 
 Here we add a learning-rate warm up
 
 .. testcode::
 
     # learning rate warm-up
-    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False):
+    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False):
         # warm up lr
         if self.trainer.global_step < 500:
             lr_scale = min(1., float(self.trainer.global_step + 1) / 500.)
@@ -229,8 +240,20 @@ Here we add a learning-rate warm up
                 pg['lr'] = lr_scale * self.hparams.learning_rate
 
         # update params
-        optimizer.step()
-        optimizer.zero_grad()
+        optimizer.step(closure=closure)
+
+The default ``optimizer_step`` is relying on the internal ``LightningOptimizer`` to properly perform a step.
+
+.. testcode::
+
+    from pytorch_lightning.core.optimizer import LightningOptimizer
+   
+    # function hook in LightningModule
+    def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure, on_tpu=False, using_native_amp=False, using_lbfgs=False):
+      if not isinstance(optimizer, LightningOptimizer):
+         # wraps into LightingOptimizer only for running step
+         optimizer = LightningOptimizer.to_lightning_optimizer(optimizer, self.trainer)
+      optimizer.step(closure=closure)
 
 ----------
 
diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py
index d6c5139cb3799..408d95a72dc47 100644
--- a/pytorch_lightning/__init__.py
+++ b/pytorch_lightning/__init__.py
@@ -1,6 +1,6 @@
 """Root package info."""
 
-__version__ = '1.1.0'
+__version__ = '1.1.1rc0'
 __author__ = 'William Falcon et al.'
 __author_email__ = 'waf2107@columbia.edu'
 __license__ = 'Apache-2.0'
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 57979b73f2cb6..f24a4ce8beb8a 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -14,7 +14,7 @@
 
 """Various hooks to be used in the Lightning code."""
 
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 import torch
 from pytorch_lightning.utilities import move_data_to_device, rank_zero_warn
@@ -501,7 +501,7 @@ def val_dataloader(self):
             will have an argument ``dataloader_idx`` which matches the order here.
         """
 
-    def transfer_batch_to_device(self, batch: Any, device: torch.device) -> Any:
+    def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] = None) -> Any:
         """
         Override this hook if your :class:`~torch.utils.data.DataLoader` returns tensors
         wrapped in a custom data structure.
@@ -549,6 +549,7 @@ def transfer_batch_to_device(self, batch, device)
             - :func:`~pytorch_lightning.utilities.apply_func.move_data_to_device`
             - :func:`~pytorch_lightning.utilities.apply_func.apply_to_collection`
         """
+        device = device or self.device
         return move_data_to_device(batch, device)
 
 
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index f29e7f75bfbff..358b24fe1f40c 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -22,6 +22,7 @@
 import tempfile
 from abc import ABC
 from argparse import Namespace
+from pathlib import Path
 from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union
 
 import torch
@@ -1170,7 +1171,6 @@ def toggle_optimizer(self, optimizer: Optimizer, optimizer_idx: int):
 
     def optimizer_step(
         self,
-        *args,
         epoch: int = None,
         batch_idx: int = None,
         optimizer: Optimizer = None,
@@ -1179,7 +1179,6 @@ def optimizer_step(
         on_tpu: bool = None,
         using_native_amp: bool = None,
         using_lbfgs: bool = None,
-        **kwargs,
     ) -> None:
         r"""
         Override this method to adjust the default way the
@@ -1254,7 +1253,7 @@ def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx,
         if not isinstance(optimizer, LightningOptimizer):
             # wraps into LightingOptimizer only for running step
             optimizer = LightningOptimizer.to_lightning_optimizer(optimizer, self.trainer)
-        optimizer.step(closure=optimizer_closure, *args, **kwargs)
+        optimizer.step(closure=optimizer_closure)
 
     def optimizer_zero_grad(
         self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int
@@ -1532,12 +1531,19 @@ def _set_hparams(self, hp: Union[dict, Namespace, str]) -> None:
         else:
             self._hparams = hp
 
-    def to_onnx(self, file_path: str, input_sample: Optional[Tensor] = None, **kwargs):
-        """Saves the model in ONNX format
+    @torch.no_grad()
+    def to_onnx(
+        self,
+        file_path: Union[str, Path],
+        input_sample: Optional[Any] = None,
+        **kwargs,
+    ):
+        """
+        Saves the model in ONNX format
 
         Args:
-            file_path: The path of the file the model should be saved to.
-            input_sample: A sample of an input tensor for tracing.
+            file_path: The path of the file the onnx model should be saved to.
+            input_sample: An input for tracing. Default: None (Use self.example_input_array)
             **kwargs: Will be passed to torch.onnx.export function.
 
         Example:
@@ -1556,31 +1562,32 @@ def to_onnx(self, file_path: str, input_sample: Optional[Tensor] = None, **kwarg
             ...     os.path.isfile(tmpfile.name)
             True
         """
+        mode = self.training
 
-        if isinstance(input_sample, Tensor):
-            input_data = input_sample
-        elif self.example_input_array is not None:
-            input_data = self.example_input_array
-        else:
-            if input_sample is not None:
+        if input_sample is None:
+            if self.example_input_array is None:
                 raise ValueError(
-                    f"Received `input_sample` of type {type(input_sample)}. Expected type is `Tensor`"
+                    "Could not export to ONNX since neither `input_sample` nor"
+                    " `model.example_input_array` attribute is set."
                 )
-            raise ValueError(
-                "Could not export to ONNX since neither `input_sample` nor"
-                " `model.example_input_array` attribute is set."
-            )
-        input_data = input_data.to(self.device)
+            input_sample = self.example_input_array
+
+        input_sample = self.transfer_batch_to_device(input_sample)
+
         if "example_outputs" not in kwargs:
             self.eval()
-            with torch.no_grad():
-                kwargs["example_outputs"] = self(input_data)
+            kwargs["example_outputs"] = self(input_sample)
 
-        torch.onnx.export(self, input_data, file_path, **kwargs)
+        torch.onnx.export(self, input_sample, file_path, **kwargs)
+        self.train(mode)
 
+    @torch.no_grad()
     def to_torchscript(
-        self, file_path: Optional[str] = None, method: Optional[str] = 'script',
-            example_inputs: Optional[Union[torch.Tensor, Tuple[torch.Tensor]]] = None, **kwargs
+        self,
+        file_path: Optional[Union[str, Path]] = None,
+        method: Optional[str] = 'script',
+        example_inputs: Optional[Any] = None,
+        **kwargs,
     ) -> Union[ScriptModule, Dict[str, ScriptModule]]:
         """
         By default compiles the whole model to a :class:`~torch.jit.ScriptModule`.
@@ -1592,7 +1599,7 @@ def to_torchscript(
         Args:
             file_path: Path where to save the torchscript. Default: None (no file saved).
             method: Whether to use TorchScript's script or trace method. Default: 'script'
-            example_inputs: Tensor to be used to do tracing when method is set to 'trace'.
+            example_inputs: An input to be used to do tracing when method is set to 'trace'.
               Default: None (Use self.example_input_array)
             **kwargs: Additional arguments that will be passed to the :func:`torch.jit.script` or
               :func:`torch.jit.trace` function.
@@ -1626,21 +1633,27 @@ def to_torchscript(
             This LightningModule as a torchscript, regardless of whether file_path is
             defined or not.
         """
-
         mode = self.training
-        with torch.no_grad():
-            if method == 'script':
-                torchscript_module = torch.jit.script(self.eval(), **kwargs)
-            elif method == 'trace':
-                # if no example inputs are provided, try to see if model has example_input_array set
-                if example_inputs is None:
-                    example_inputs = self.example_input_array
-                # automatically send example inputs to the right device and use trace
-                example_inputs = self.transfer_batch_to_device(example_inputs, device=self.device)
-                torchscript_module = torch.jit.trace(func=self.eval(), example_inputs=example_inputs, **kwargs)
-            else:
-                raise ValueError(f"The 'method' parameter only supports 'script' or 'trace', but value given was:"
-                                 f"{method}")
+
+        if method == 'script':
+            torchscript_module = torch.jit.script(self.eval(), **kwargs)
+        elif method == 'trace':
+            # if no example inputs are provided, try to see if model has example_input_array set
+            if example_inputs is None:
+                if self.example_input_array is None:
+                    raise ValueError(
+                        'Choosing method=`trace` requires either `example_inputs`'
+                        ' or `model.example_input_array` to be defined'
+                    )
+                example_inputs = self.example_input_array
+
+            # automatically send example inputs to the right device and use trace
+            example_inputs = self.transfer_batch_to_device(example_inputs)
+            torchscript_module = torch.jit.trace(func=self.eval(), example_inputs=example_inputs, **kwargs)
+        else:
+            raise ValueError("The 'method' parameter only supports 'script' or 'trace',"
+                             f" but value given was: {method}")
+
         self.train(mode)
 
         if file_path is not None:
diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py
index e6b973b336e43..c8e9ff8b80a2f 100644
--- a/pytorch_lightning/core/optimizer.py
+++ b/pytorch_lightning/core/optimizer.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import inspect
 import types
 from typing import Any, Callable, Optional
 from weakref import proxy
@@ -58,12 +57,35 @@ def __init__(self,
         else:
             self.__class__ = type("Lightning" + optimizer.__class__.__name__, (self.__class__, optimizer.__class__), {})
 
-        self._trainer = None
         self._optimizer = optimizer
+        self._trainer = None
         self._accumulate_grad_batches = accumulate_grad_batches
-        self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters
         self._optimizer_idx = None
 
+    @property
+    def defaults(self):
+        return self._optimizer.defaults
+
+    @defaults.setter
+    def defaults(self, defaults):
+        self._optimizer.defaults = defaults
+
+    @property
+    def state(self):
+        return self._optimizer.state
+
+    @state.setter
+    def state(self, state):
+        self._optimizer.state = state
+
+    @property
+    def param_groups(self):
+        return self._optimizer.param_groups
+
+    @param_groups.setter
+    def param_groups(self, param_groups):
+        self._optimizer.param_groups = param_groups
+
     @property
     def accumulate_grad_batches(self):
         return self._accumulate_grad_batches
@@ -111,11 +133,7 @@ def __optimizer_step(self, *args, closure: Optional[Callable] = None, profiler_n
 
         else:
             with trainer.profiler.profile(profiler_name):
-                if self._support_closure:
-                    optimizer.step(closure=closure, *args, **kwargs)
-                else:
-                    closure()
-                    optimizer.step(*args, **kwargs)
+                optimizer.step(closure=closure, *args, **kwargs)
 
         accelerator_backend = trainer.accelerator_backend
         if accelerator_backend is not None and accelerator_backend.rpc_enabled:
diff --git a/pytorch_lightning/metrics/classification/__init__.py b/pytorch_lightning/metrics/classification/__init__.py
index 13cb705f30b17..b4cbb6b073efe 100644
--- a/pytorch_lightning/metrics/classification/__init__.py
+++ b/pytorch_lightning/metrics/classification/__init__.py
@@ -14,7 +14,7 @@
 from pytorch_lightning.metrics.classification.accuracy import Accuracy
 from pytorch_lightning.metrics.classification.average_precision import AveragePrecision
 from pytorch_lightning.metrics.classification.confusion_matrix import ConfusionMatrix
-from pytorch_lightning.metrics.classification.f_beta import FBeta, F1
+from pytorch_lightning.metrics.classification.f_beta import FBeta, Fbeta, F1
 from pytorch_lightning.metrics.classification.precision_recall import Precision, Recall
 from pytorch_lightning.metrics.classification.precision_recall_curve import PrecisionRecallCurve
 from pytorch_lightning.metrics.classification.roc import ROC
diff --git a/pytorch_lightning/metrics/classification/average_precision.py b/pytorch_lightning/metrics/classification/average_precision.py
index 0a8a952470dbc..33878cb48965d 100644
--- a/pytorch_lightning/metrics/classification/average_precision.py
+++ b/pytorch_lightning/metrics/classification/average_precision.py
@@ -92,9 +92,8 @@ def __init__(
         self.add_state("target", default=[], dist_reduce_fx=None)
 
         rank_zero_warn(
-            'Metric `AveragePrecision` will save all targets and'
-            ' predictions in buffer. For large datasets this may lead'
-            ' to large memory footprint.'
+            'Metric `AveragePrecision` will save all targets and predictions in buffer.'
+            ' For large datasets this may lead to large memory footprint.'
         )
 
     def update(self, preds: torch.Tensor, target: torch.Tensor):
diff --git a/pytorch_lightning/metrics/classification/f_beta.py b/pytorch_lightning/metrics/classification/f_beta.py
index 56cc00f9a5dce..fadfd000ebbe1 100755
--- a/pytorch_lightning/metrics/classification/f_beta.py
+++ b/pytorch_lightning/metrics/classification/f_beta.py
@@ -20,6 +20,7 @@
     _fbeta_compute
 )
 from pytorch_lightning.metrics.metric import Metric
+from pytorch_lightning.utilities import rank_zero_warn
 
 
 class FBeta(Metric):
@@ -51,11 +52,11 @@ class FBeta(Metric):
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
         compute_on_step:
@@ -131,6 +132,34 @@ def compute(self) -> torch.Tensor:
                               self.actual_positives, self.beta, self.average)
 
 
+# todo: remove in v1.2
+class Fbeta(FBeta):
+    r"""
+    Computes `F-score <https://en.wikipedia.org/wiki/F-score>`_
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.classification.f_beta.FBeta`
+    """
+    def __init__(
+        self,
+        num_classes: int,
+        beta: float = 1.0,
+        threshold: float = 0.5,
+        average: str = "micro",
+        multilabel: bool = False,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ):
+        rank_zero_warn(
+            "This `Fbeta` was deprecated in v1.0.x in favor of"
+            " `from pytorch_lightning.metrics.classification.f_beta import FBeta`."
+            " It will be removed in v1.2.0", DeprecationWarning
+        )
+        super().__init__(
+            num_classes, beta, threshold, average, multilabel, compute_on_step, dist_sync_on_step, process_group
+        )
+
+
 class F1(FBeta):
     """
     Computes F1 metric. F1 metrics correspond to a harmonic mean of the
@@ -156,11 +185,11 @@ class F1(FBeta):
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
         compute_on_step:
@@ -183,7 +212,6 @@ class F1(FBeta):
     def __init__(
         self,
         num_classes: int = 1,
-        beta: float = 1.0,
         threshold: float = 0.5,
         average: str = "micro",
         multilabel: bool = False,
diff --git a/pytorch_lightning/metrics/classification/precision_recall_curve.py b/pytorch_lightning/metrics/classification/precision_recall_curve.py
index 052a25a7a977d..620904898535d 100644
--- a/pytorch_lightning/metrics/classification/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/classification/precision_recall_curve.py
@@ -102,9 +102,8 @@ def __init__(
         self.add_state("target", default=[], dist_reduce_fx=None)
 
         rank_zero_warn(
-            'Metric `PrecisionRecallCurve` will save all targets and'
-            ' predictions in buffer. For large datasets this may lead'
-            ' to large memory footprint.'
+            'Metric `PrecisionRecallCurve` will save all targets and predictions in buffer.'
+            ' For large datasets this may lead to large memory footprint.'
         )
 
     def update(self, preds: torch.Tensor, target: torch.Tensor):
diff --git a/pytorch_lightning/metrics/classification/roc.py b/pytorch_lightning/metrics/classification/roc.py
index 89e8265b19fc1..2b7d82488b491 100644
--- a/pytorch_lightning/metrics/classification/roc.py
+++ b/pytorch_lightning/metrics/classification/roc.py
@@ -105,9 +105,8 @@ def __init__(
         self.add_state("target", default=[], dist_reduce_fx=None)
 
         rank_zero_warn(
-            'Metric `ROC` will save all targets and'
-            ' predictions in buffer. For large datasets this may lead'
-            ' to large memory footprint.'
+            'Metric `ROC` will save all targets and predictions in buffer.'
+            ' For large datasets this may lead to large memory footprint.'
         )
 
     def update(self, preds: torch.Tensor, target: torch.Tensor):
diff --git a/pytorch_lightning/metrics/functional/__init__.py b/pytorch_lightning/metrics/functional/__init__.py
index e13242e40b0ac..e38ab5f415c32 100644
--- a/pytorch_lightning/metrics/functional/__init__.py
+++ b/pytorch_lightning/metrics/functional/__init__.py
@@ -17,13 +17,18 @@
     auc,
     auroc,
     dice_score,
+    f1_score,
+    fbeta_score,
+    get_num_classes,
+    iou,
     multiclass_auroc,
     precision,
     precision_recall,
     recall,
     stat_scores,
     stat_scores_multiple_classes,
-    iou,
+    to_categorical,
+    to_onehot,
 )
 from pytorch_lightning.metrics.functional.confusion_matrix import confusion_matrix
 # TODO: unify metrics between class and functional, add below
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
index 1c43ec75bb508..e1ba601b51553 100644
--- a/pytorch_lightning/metrics/functional/classification.py
+++ b/pytorch_lightning/metrics/functional/classification.py
@@ -15,12 +15,75 @@
 from typing import Callable, Optional, Sequence, Tuple
 
 import torch
-from torch.nn import functional as F
 
-from pytorch_lightning.metrics.utils import to_categorical, get_num_classes, reduce, class_reduce
+from pytorch_lightning.metrics.functional.average_precision import average_precision as __ap
+from pytorch_lightning.metrics.functional.f_beta import fbeta as __fb, f1 as __f1
+from pytorch_lightning.metrics.functional.precision_recall_curve import _binary_clf_curve, precision_recall_curve as __prc
+from pytorch_lightning.metrics.functional.roc import roc as __roc
+from pytorch_lightning.metrics.utils import (
+    to_categorical as __tc,
+    to_onehot as __to,
+    get_num_classes as __gnc,
+    reduce,
+    class_reduce,
+)
 from pytorch_lightning.utilities import rank_zero_warn
 
 
+def to_onehot(
+        tensor: torch.Tensor,
+        num_classes: Optional[int] = None,
+) -> torch.Tensor:
+    """
+    Converts a dense label tensor to one-hot format
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.utils.to_onehot`
+    """
+    rank_zero_warn(
+        "This `to_onehot` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.utils import to_onehot`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __to(tensor, num_classes)
+
+
+def to_categorical(
+        tensor: torch.Tensor,
+        argmax_dim: int = 1
+) -> torch.Tensor:
+    """
+    Converts a tensor of probabilities to a dense label tensor
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.utils.to_categorical`
+
+    """
+    rank_zero_warn(
+        "This `to_categorical` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.utils import to_categorical`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __tc(tensor)
+
+
+def get_num_classes(
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        num_classes: Optional[int] = None,
+) -> int:
+    """
+    Calculates the number of classes for a given prediction and target tensor.
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.utils.get_num_classes`
+
+    """
+    rank_zero_warn(
+        "This `get_num_classes` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.utils import get_num_classes`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __gnc(pred,target, num_classes)
+
+
 def stat_scores(
         pred: torch.Tensor,
         target: torch.Tensor,
@@ -332,52 +395,28 @@ def recall(
                             num_classes=num_classes, class_reduction=class_reduction)[1]
 
 
-def _binary_clf_curve(
+# todo: remove in 1.3
+def roc(
         pred: torch.Tensor,
         target: torch.Tensor,
         sample_weight: Optional[Sequence] = None,
         pos_label: int = 1.,
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     """
-    adapted from https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/_ranking.py
-    """
-    if sample_weight is not None and not isinstance(sample_weight, torch.Tensor):
-        sample_weight = torch.tensor(sample_weight, device=pred.device, dtype=torch.float)
-
-    # remove class dimension if necessary
-    if pred.ndim > target.ndim:
-        pred = pred[:, 0]
-    desc_score_indices = torch.argsort(pred, descending=True)
-
-    pred = pred[desc_score_indices]
-    target = target[desc_score_indices]
-
-    if sample_weight is not None:
-        weight = sample_weight[desc_score_indices]
-    else:
-        weight = 1.
-
-    # pred typically has many tied values. Here we extract
-    # the indices associated with the distinct values. We also
-    # concatenate a value for the end of the curve.
-    distinct_value_indices = torch.where(pred[1:] - pred[:-1])[0]
-    threshold_idxs = F.pad(distinct_value_indices, (0, 1), value=target.size(0) - 1)
-
-    target = (target == pos_label).to(torch.long)
-    tps = torch.cumsum(target * weight, dim=0)[threshold_idxs]
-
-    if sample_weight is not None:
-        # express fps as a cumsum to ensure fps is increasing even in
-        # the presence of floating point errors
-        fps = torch.cumsum((1 - target) * weight, dim=0)[threshold_idxs]
-    else:
-        fps = 1 + threshold_idxs - tps
+    Computes the Receiver Operating Characteristic (ROC). It assumes classifier is binary.
 
-    return fps, tps, pred[threshold_idxs]
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.roc.roc`
+    """
+    rank_zero_warn(
+        "This `multiclass_roc` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.functional.roc import roc`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __roc(preds=pred, target=target, sample_weights=sample_weight, pos_label=pos_label)
 
 
 # TODO: deprecated in favor of general ROC in pytorch_lightning/metrics/functional/roc.py
-def __roc(
+def _roc(
         pred: torch.Tensor,
         target: torch.Tensor,
         sample_weight: Optional[Sequence] = None,
@@ -386,22 +425,13 @@ def __roc(
     """
     Computes the Receiver Operating Characteristic (ROC). It assumes classifier is binary.
 
-    .. warning:: Deprecated
-
-    Args:
-        pred: estimated probabilities
-        target: ground-truth labels
-        sample_weight: sample weights
-        pos_label: the label for the positive class
-
-    Return:
-        false-positive rate (fpr), true-positive rate (tpr), thresholds
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.roc.roc`
 
     Example:
 
         >>> x = torch.tensor([0, 1, 2, 3])
         >>> y = torch.tensor([0, 1, 1, 1])
-        >>> fpr, tpr, thresholds = __roc(x, y)
+        >>> fpr, tpr, thresholds = _roc(x, y)
         >>> fpr
         tensor([0., 0., 0., 0., 1.])
         >>> tpr
@@ -410,9 +440,12 @@ def __roc(
         tensor([4, 3, 2, 1, 0])
 
     """
-    fps, tps, thresholds = _binary_clf_curve(pred=pred, target=target,
-                                             sample_weight=sample_weight,
-                                             pos_label=pos_label)
+    rank_zero_warn(
+        "This `multiclass_roc` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.functional.roc import roc`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    fps, tps, thresholds = _binary_clf_curve(pred, target, sample_weights=sample_weight, pos_label=pos_label)
 
     # Add an extra threshold position
     # to make sure that the curve starts at (0, 0)
@@ -434,7 +467,7 @@ def __roc(
 
 
 # TODO: deprecated in favor of general ROC in pytorch_lightning/metrics/functional/roc.py
-def __multiclass_roc(
+def multiclass_roc(
         pred: torch.Tensor,
         target: torch.Tensor,
         sample_weight: Optional[Sequence] = None,
@@ -443,7 +476,7 @@ def __multiclass_roc(
     """
     Computes the Receiver Operating Characteristic (ROC) for multiclass predictors.
 
-    .. warning:: Deprecated
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.roc.roc`
 
     Args:
         pred: estimated probabilities
@@ -462,19 +495,24 @@ def __multiclass_roc(
         ...                      [0.05, 0.05, 0.85, 0.05],
         ...                      [0.05, 0.05, 0.05, 0.85]])
         >>> target = torch.tensor([0, 1, 3, 2])
-        >>> __multiclass_roc(pred, target)   # doctest: +NORMALIZE_WHITESPACE
+        >>> multiclass_roc(pred, target)   # doctest: +NORMALIZE_WHITESPACE
         ((tensor([0., 0., 1.]), tensor([0., 1., 1.]), tensor([1.8500, 0.8500, 0.0500])),
          (tensor([0., 0., 1.]), tensor([0., 1., 1.]), tensor([1.8500, 0.8500, 0.0500])),
          (tensor([0.0000, 0.3333, 1.0000]), tensor([0., 0., 1.]), tensor([1.8500, 0.8500, 0.0500])),
          (tensor([0.0000, 0.3333, 1.0000]), tensor([0., 0., 1.]), tensor([1.8500, 0.8500, 0.0500])))
     """
+    rank_zero_warn(
+        "This `multiclass_roc` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.functional.roc import roc`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
     num_classes = get_num_classes(pred, target, num_classes)
 
     class_roc_vals = []
     for c in range(num_classes):
         pred_c = pred[:, c]
 
-        class_roc_vals.append(__roc(pred=pred_c, target=target, sample_weight=sample_weight, pos_label=c))
+        class_roc_vals.append(_roc(pred=pred_c, target=target, sample_weight=sample_weight, pos_label=c))
 
     return tuple(class_roc_vals)
 
@@ -572,7 +610,7 @@ def auroc(
 
     @auc_decorator()
     def _auroc(pred, target, sample_weight, pos_label):
-        return __roc(pred, target, sample_weight, pos_label)
+        return _roc(pred, target, sample_weight, pos_label)
 
     return _auroc(pred=pred, target=target, sample_weight=sample_weight, pos_label=pos_label)
 
@@ -625,7 +663,7 @@ def multiclass_auroc(
 
     @multiclass_auc_decorator()
     def _multiclass_auroc(pred, target, sample_weight, num_classes):
-        return __multiclass_roc(pred, target, sample_weight, num_classes)
+        return multiclass_roc(pred, target, sample_weight, num_classes)
 
     class_aurocs = _multiclass_auroc(pred=pred, target=target,
                                      sample_weight=sample_weight,
@@ -772,3 +810,110 @@ def iou(
         ])
 
     return reduce(scores, reduction=reduction)
+
+
+# todo: remove in 1.3
+def precision_recall_curve(
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        sample_weight: Optional[Sequence] = None,
+        pos_label: int = 1.,
+):
+    """
+    Computes precision-recall pairs for different thresholds.
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.precision_recall_curve.precision_recall_curve`
+    """
+    rank_zero_warn(
+        "This `precision_recall_curve` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.functional.precision_recall_curve import precision_recall_curve`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __prc(preds=pred, target=target, sample_weights=sample_weight, pos_label=pos_label)
+
+
+# todo: remove in 1.3
+def multiclass_precision_recall_curve(
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        sample_weight: Optional[Sequence] = None,
+        num_classes: Optional[int] = None,
+):
+    """
+    Computes precision-recall pairs for different thresholds given a multiclass scores.
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.precision_recall_curve.precision_recall_curve`
+    """
+    rank_zero_warn(
+        "This `multiclass_precision_recall_curve` was deprecated in v1.1.0 in favor of"
+        " `from pytorch_lightning.metrics.functional.precision_recall_curve import precision_recall_curve`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    if num_classes is None:
+        num_classes = get_num_classes(pred, target, num_classes)
+    return __prc(preds=pred, target=target, sample_weights=sample_weight, num_classes=num_classes)
+
+
+# todo: remove in 1.3
+def average_precision(
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        sample_weight: Optional[Sequence] = None,
+        pos_label: int = 1.,
+):
+    """
+    Compute average precision from prediction scores.
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.average_precision.average_precision`
+    """
+    rank_zero_warn(
+        "This `average_precision` was deprecated in v1.1.0 in favor of"
+        " `pytorch_lightning.metrics.functional.average_precision import average_precision`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __ap(preds=pred, target=target, sample_weights=sample_weight, pos_label=pos_label)
+
+
+# todo: remove in 1.2
+def fbeta_score(
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        beta: float,
+        num_classes: Optional[int] = None,
+        class_reduction: str = 'micro',
+) -> torch.Tensor:
+    """
+    Computes the F-beta score which is a weighted harmonic mean of precision and recall.
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.f_beta.fbeta`
+    """
+    rank_zero_warn(
+        "This `average_precision` was deprecated in v1.0.x in favor of"
+        " `from pytorch_lightning.metrics.functional.f_beta import fbeta`."
+        " It will be removed in v1.2.0", DeprecationWarning
+    )
+    if num_classes is None:
+        num_classes = get_num_classes(pred, target)
+    return __fb(preds=pred, target=target, beta=beta, num_classes=num_classes, average=class_reduction)
+
+
+# todo: remove in 1.2
+def f1_score(
+        pred: torch.Tensor,
+        target: torch.Tensor,
+        num_classes: Optional[int] = None,
+        class_reduction: str = 'micro',
+) -> torch.Tensor:
+    """
+    Computes the F1-score (a.k.a F-measure), which is the harmonic mean of the precision and recall.
+
+    .. warning :: Deprecated in favor of :func:`~pytorch_lightning.metrics.functional.f_beta.f1`
+    """
+    rank_zero_warn(
+        "This `average_precision` was deprecated in v1.0.x in favor of"
+        " `from pytorch_lightning.metrics.functional.f_beta import f1`."
+        " It will be removed in v1.2.0", DeprecationWarning
+    )
+    if num_classes is None:
+        num_classes = get_num_classes(pred, target)
+    return __f1(preds=pred, target=target, num_classes=num_classes, average=class_reduction)
diff --git a/pytorch_lightning/metrics/functional/explained_variance.py b/pytorch_lightning/metrics/functional/explained_variance.py
index 012e1486ebb1f..20b38c58a2a6b 100644
--- a/pytorch_lightning/metrics/functional/explained_variance.py
+++ b/pytorch_lightning/metrics/functional/explained_variance.py
@@ -23,10 +23,11 @@ def _explained_variance_update(preds: torch.Tensor, target: torch.Tensor) -> Tup
     return preds, target
 
 
-def _explained_variance_compute(preds: torch.Tensor,
-                                target: torch.Tensor,
-                                multioutput: str = 'uniform_average',
-                                ) -> Union[torch.Tensor, Sequence[torch.Tensor]]:
+def _explained_variance_compute(
+        preds: torch.Tensor,
+        target: torch.Tensor,
+        multioutput: str = 'uniform_average',
+) -> Union[torch.Tensor, Sequence[torch.Tensor]]:
     diff_avg = torch.mean(target - preds, dim=0)
     numerator = torch.mean((target - preds - diff_avg) ** 2, dim=0)
 
@@ -52,10 +53,11 @@ def _explained_variance_compute(preds: torch.Tensor,
         return torch.sum(denominator / denom_sum * output_scores)
 
 
-def explained_variance(preds: torch.Tensor,
-                       target: torch.Tensor,
-                       multioutput: str = 'uniform_average',
-                       ) -> Union[torch.Tensor, Sequence[torch.Tensor]]:
+def explained_variance(
+        preds: torch.Tensor,
+        target: torch.Tensor,
+        multioutput: str = 'uniform_average',
+) -> Union[torch.Tensor, Sequence[torch.Tensor]]:
     """
     Computes explained variance.
 
diff --git a/pytorch_lightning/metrics/functional/f_beta.py b/pytorch_lightning/metrics/functional/f_beta.py
index 3f0a7a0449325..2b0ba194d56f0 100755
--- a/pytorch_lightning/metrics/functional/f_beta.py
+++ b/pytorch_lightning/metrics/functional/f_beta.py
@@ -83,11 +83,11 @@ def fbeta(
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
 
@@ -110,7 +110,6 @@ def f1(
         preds: torch.Tensor,
         target: torch.Tensor,
         num_classes: int,
-        beta: float = 1.0,
         threshold: float = 0.5,
         average: str = "micro",
         multilabel: bool = False
@@ -136,11 +135,11 @@ def f1(
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
 
diff --git a/pytorch_lightning/metrics/functional/reduction.py b/pytorch_lightning/metrics/functional/reduction.py
new file mode 100644
index 0000000000000..c116b16d363a9
--- /dev/null
+++ b/pytorch_lightning/metrics/functional/reduction.py
@@ -0,0 +1,35 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+from pytorch_lightning.metrics.utils import reduce as __reduce, class_reduce as __cr
+from pytorch_lightning.utilities import rank_zero_warn
+
+
+def reduce(to_reduce: torch.Tensor, reduction: str) -> torch.Tensor:
+    rank_zero_warn(
+        "This `reduce` was deprecated in v1.1.0 in favor of"
+        " `pytorch_lightning.metrics.utils import reduce`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __reduce(to_reduce=to_reduce, reduction=reduction)
+
+
+def class_reduce(num: torch.Tensor, denom: torch.Tensor, weights: torch.Tensor, class_reduction: str = 'none'):
+    rank_zero_warn(
+        "This `class_reduce` was deprecated in v1.1.0 in favor of"
+        " `pytorch_lightning.metrics.utils import class_reduce`."
+        " It will be removed in v1.3.0", DeprecationWarning
+    )
+    return __cr(num=num, denom=denom, weights=weights, class_reduction=class_reduction)
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 20dfb0f4b380f..68a0f4781c9a9 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -477,7 +477,7 @@ def _process_result(self, training_step_output, split_batch):
 
         return training_step_output_for_epoch_end
 
-    def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure, *args, **kwargs):
+    def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_closure):
         model_ref = self.trainer.get_model()
 
         is_lbfgs = isinstance(optimizer, torch.optim.LBFGS)
@@ -491,16 +491,14 @@ def optimizer_step(self, optimizer, opt_idx, batch_idx, train_step_and_backward_
 
         # model hook
         model_ref.optimizer_step(
-            epoch=self.trainer.current_epoch,
-            batch_idx=batch_idx,
-            optimizer=optimizer,
-            optimizer_idx=opt_idx,
-            optimizer_closure=train_step_and_backward_closure,
+            self.trainer.current_epoch,
+            batch_idx,
+            optimizer,
+            opt_idx,
+            train_step_and_backward_closure,
             on_tpu=self.trainer.use_tpu and TPU_AVAILABLE,
             using_native_amp=using_native_amp,
             using_lbfgs=is_lbfgs,
-            *args,
-            **kwargs,
         )
 
     def on_before_zero_grad(self, optimizer):
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index d8847d592e1de..6f6b5f858ff17 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -15,7 +15,6 @@
 from torch import nn
 from torch.utils.data import Dataset, DataLoader
 
-from pytorch_lightning.core.step_result import TrainResult, EvalResult
 from pytorch_lightning.core.lightning import LightningModule
 
 
@@ -111,235 +110,6 @@ def training_epoch_end_scalar(self, outputs):
                 assert batch_out.grad_fn is None
                 assert isinstance(batch_out, torch.Tensor)
 
-    def training_step_no_default_callbacks_for_train_loop(self, batch, batch_idx):
-        """
-        Early stop and checkpoint only on these values
-        """
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-        assert 'early_step_on' not in result
-        assert 'checkpoint_on' in result
-        return result
-
-    def training_step_no_callbacks_result_obj(self, batch, batch_idx):
-        """
-        Early stop and checkpoint only on these values
-        """
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc, checkpoint_on=False)
-        assert 'early_step_on' not in result
-        assert 'checkpoint_on' not in result
-        return result
-
-    def training_step_result_log_epoch_and_step_for_callbacks(self, batch, batch_idx):
-        """
-        Early stop and checkpoint only on these values
-        """
-        acc = self.step(batch, batch_idx)
-
-        self.assert_backward = False
-        losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20]
-        idx = self.current_epoch
-        loss = acc + losses[idx]
-        result = TrainResult(minimize=loss, early_stop_on=loss, checkpoint_on=loss)
-        return result
-
-    def training_step_result_log_step_only(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-
-        # step only metrics
-        result.log(f'step_log_and_pbar_acc1_b{batch_idx}', torch.tensor(11).type_as(acc), prog_bar=True)
-        result.log(f'step_log_acc2_b{batch_idx}', torch.tensor(12).type_as(acc))
-        result.log(f'step_pbar_acc3_b{batch_idx}', torch.tensor(13).type_as(acc), logger=False, prog_bar=True)
-
-        self.training_step_called = True
-        return result
-
-    def training_step_result_log_epoch_only(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-
-        result.log(f'epoch_log_and_pbar_acc1_e{self.current_epoch}', torch.tensor(14).type_as(acc),
-                   on_epoch=True, prog_bar=True, on_step=False)
-        result.log(f'epoch_log_acc2_e{self.current_epoch}', torch.tensor(15).type_as(acc),
-                   on_epoch=True, on_step=False)
-        result.log(f'epoch_pbar_acc3_e{self.current_epoch}', torch.tensor(16).type_as(acc),
-                   on_epoch=True, logger=False, prog_bar=True, on_step=False)
-
-        self.training_step_called = True
-        return result
-
-    def training_step_result_log_epoch_and_step(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-
-        val_1 = (5 + batch_idx) * (self.current_epoch + 1)
-        val_2 = (6 + batch_idx) * (self.current_epoch + 1)
-        val_3 = (7 + batch_idx) * (self.current_epoch + 1)
-        result.log('step_epoch_log_and_pbar_acc1', torch.tensor(val_1).type_as(acc),
-                   on_epoch=True, prog_bar=True)
-        result.log('step_epoch_log_acc2', torch.tensor(val_2).type_as(acc),
-                   on_epoch=True)
-        result.log('step_epoch_pbar_acc3', torch.tensor(val_3).type_as(acc),
-                   on_epoch=True, logger=False, prog_bar=True)
-
-        self.training_step_called = True
-        return result
-
-    def training_epoch_end_return_for_log_epoch_and_step(self, result):
-        """
-        There should be an array of scalars without graphs that are all 171 (4 of them)
-        """
-        self.training_epoch_end_called = True
-
-        if self.use_dp or self.use_ddp2:
-            pass
-        else:
-            # only saw 4 batches
-            assert isinstance(result, TrainResult)
-
-        result.step_epoch_log_acc2 = result.step_epoch_log_acc2_step.prod()
-        result.step_epoch_pbar_acc3 = result.step_epoch_pbar_acc3_step.prod()
-        result.step_epoch_log_and_pbar_acc1 = result.step_epoch_log_and_pbar_acc1_step.prod()
-        result.minimize = result.minimize.mean()
-        result.checkpoint_on = result.checkpoint_on.mean()
-
-        result.step_epoch_log_and_pbar_acc1_step = result.step_epoch_log_and_pbar_acc1_step.prod()
-        result.step_epoch_log_and_pbar_acc1_epoch = result.step_epoch_log_and_pbar_acc1_epoch.prod()
-        result.step_epoch_log_acc2_step = result.step_epoch_log_acc2_step.prod()
-        result.step_epoch_log_acc2_epoch = result.step_epoch_log_acc2_epoch.prod()
-        result.step_epoch_pbar_acc3_step = result.step_epoch_pbar_acc3_step.prod()
-        result.step_epoch_pbar_acc3_epoch = result.step_epoch_pbar_acc3_epoch.prod()
-        result.log('epoch_end_log_acc', torch.tensor(1212).type_as(result.step_epoch_log_acc2_epoch),
-                   logger=True, on_epoch=True)
-        result.log('epoch_end_pbar_acc', torch.tensor(1213).type_as(result.step_epoch_log_acc2_epoch),
-                   logger=False, prog_bar=True, on_epoch=True)
-        result.log('epoch_end_log_pbar_acc', torch.tensor(1214).type_as(result.step_epoch_log_acc2_epoch),
-                   logger=True, prog_bar=True, on_epoch=True)
-        return result
-
-    # --------------------------
-    # EvalResults
-    # --------------------------
-    def validation_step_result_callbacks(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-
-        self.assert_backward = False
-        losses = [20, 19, 20, 21, 22, 23]
-        idx = self.current_epoch
-        loss = acc + losses[idx]
-        result = EvalResult(early_stop_on=loss, checkpoint_on=loss)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_no_callbacks(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-
-        self.assert_backward = False
-        losses = [20, 19, 20, 21, 22, 23, 50, 50, 50, 50, 50, 50]
-        idx = self.current_epoch
-        loss = acc + losses[idx]
-
-        result = EvalResult(checkpoint_on=loss)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_only_epoch_metrics(self, batch, batch_idx):
-        """
-        Only track epoch level metrics
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('no_val_no_pbar', torch.tensor(11 + batch_idx).type_as(acc), prog_bar=False, logger=False)
-        result.log('val_step_log_acc', torch.tensor(11 + batch_idx).type_as(acc), prog_bar=False, logger=True)
-        result.log('val_step_log_pbar_acc', torch.tensor(12 + batch_idx).type_as(acc), prog_bar=True, logger=True)
-        result.log('val_step_pbar_acc', torch.tensor(13 + batch_idx).type_as(acc), prog_bar=True, logger=False)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_only_step_metrics(self, batch, batch_idx):
-        """
-        Only track epoch level metrics
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('no_val_no_pbar', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=False, on_epoch=False, on_step=True)
-        result.log('val_step_log_acc', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=True, on_epoch=False, on_step=True)
-        result.log('val_step_log_pbar_acc', torch.tensor(12 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=False, on_step=True)
-        result.log('val_step_pbar_acc', torch.tensor(13 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=False, on_epoch=False, on_step=True)
-        result.log('val_step_batch_idx', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=False, on_step=True)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_epoch_step_metrics(self, batch, batch_idx):
-        """
-        Only track epoch level metrics
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('no_val_no_pbar', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=False, on_epoch=True, on_step=True)
-        result.log('val_step_log_acc', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=True, on_epoch=True, on_step=True)
-        result.log('val_step_log_pbar_acc', torch.tensor(12 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=True)
-        result.log('val_step_pbar_acc', torch.tensor(13 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=False, on_epoch=True, on_step=True)
-        result.log('val_step_batch_idx', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=True)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_for_epoch_end_result(self, batch, batch_idx):
-        """
-        EvalResult flows to epoch end (without step_end)
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('val_step_metric', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=False)
-        result.log('batch_idx', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=False)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_epoch_end_result(self, result):
-        self.validation_epoch_end_called = True
-
-        if self.trainer.running_sanity_check:
-            assert len(result.batch_idx) == 2
-        else:
-            assert len(result.batch_idx) == self.trainer.limit_val_batches
-
-        expected_val = result.val_step_metric.sum() / len(result.batch_idx)
-        result.val_step_metric = result.val_step_metric.mean()
-        result.batch_idx = result.batch_idx.mean()
-        assert result.val_step_metric == expected_val
-
-        result.log('val_epoch_end_metric', torch.tensor(189).type_as(result.val_step_metric), prog_bar=True)
-
-        return result
-
     # --------------------------
     # dictionary returns
     # --------------------------
diff --git a/tests/base/model_test_steps.py b/tests/base/model_test_steps.py
index 0010dcdf14a19..440ec4c4c35b4 100644
--- a/tests/base/model_test_steps.py
+++ b/tests/base/model_test_steps.py
@@ -59,38 +59,6 @@ def test_step(self, batch, batch_idx, *args, **kwargs):
                                   'test_dic': {'test_loss_a': loss_test}})
             return output
 
-    def test_step_result_obj(self, batch, batch_idx, *args, **kwargs):
-        """
-        Default, baseline test_step
-        :param batch:
-        :return:
-        """
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        loss_test = self.loss(y, y_hat)
-
-        # acc
-        labels_hat = torch.argmax(y_hat, dim=1)
-        test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
-        test_acc = torch.tensor(test_acc)
-
-        test_acc = test_acc.type_as(x)
-
-        result = EvalResult()
-        # alternate possible outputs to test
-        if batch_idx % 1 == 0:
-            result.log_dict({'test_loss': loss_test, 'test_acc': test_acc})
-            return result
-        if batch_idx % 2 == 0:
-            return test_acc
-
-        if batch_idx % 3 == 0:
-            result.log_dict({'test_loss': loss_test, 'test_acc': test_acc})
-            result.test_dic = {'test_loss_a': loss_test}
-            return result
-
     def test_step__multiple_dataloaders(self, batch, batch_idx, dataloader_idx, **kwargs):
         """
         Default, baseline test_step
diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index caec6db9aaa10..0590f5b7b5ccc 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -53,25 +53,6 @@ def training_step(self, batch, batch_idx, optimizer_idx=None):
         )
         return output
 
-    def training_step_result_obj(self, batch, batch_idx, optimizer_idx=None):
-        # forward pass
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        # calculate loss
-        loss_val = self.loss(y, y_hat)
-        log_val = loss_val
-
-        # alternate between tensors and scalars for "log" and "progress_bar"
-        if batch_idx % 2 == 0:
-            log_val = log_val.item()
-
-        result = TrainResult(loss_val)
-        result.log('some_val', log_val * log_val, prog_bar=True, logger=False)
-        result.log('train_some_val', log_val * log_val)
-        return result
-
     def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None):
         output = self.training_step(batch, batch_idx, optimizer_idx)
         if batch_idx == self.test_step_inf_loss:
@@ -81,19 +62,6 @@ def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None):
                 output /= 0
         return output
 
-    def training_step_full_loop_result_obj_dp(self, batch, batch_idx, optimizer_idx=None):
-        """
-        Full loop flow train step (result obj + dp)
-        """
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x.to(self.device))
-        loss_val = y_hat.sum()
-        result = TrainResult(minimize=loss_val)
-        result.log('train_step_metric', loss_val + 1)
-        self.training_step_called = True
-        return result
-
     def training_step_result_obj_dp(self, batch, batch_idx, optimizer_idx=None):
         # forward pass
         x, y = batch
@@ -136,23 +104,6 @@ def training_epoch_end_full_loop_result_obj_dp(self, result):
 
         return result
 
-    def eval_step_full_loop_result_obj_dp(self, batch, batch_idx, optimizer_idx=None):
-        """
-        Full loop flow train step (result obj + dp)
-        """
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x.to(self.device))
-        loss_val = y_hat.sum()
-        result = EvalResult(checkpoint_on=loss_val, early_stop_on=loss_val)
-
-        eval_name = 'validation' if not self.trainer.testing else 'test'
-        result.log(f'{eval_name}_step_metric', loss_val + 1, on_step=True)
-
-        setattr(self, f'{eval_name}_step_called', True)
-
-        return result
-
     def eval_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step (result obj + dp)
@@ -198,20 +149,3 @@ def eval_epoch_end_full_loop_result_obj_dp(self, result):
         setattr(result, f'{eval_name}_step_metric', reduced)
 
         return result
-
-    def training_step__using_metrics(self, batch, batch_idx, optimizer_idx=None):
-        """Lightning calls this inside the training loop"""
-        # forward pass
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        # calculate loss
-        loss_val = self.loss(y, y_hat)
-
-        # call metric
-        val = self.metric(x, y)
-
-        result = TrainResult(minimize=loss_val)
-        result.log('metric_val', val)
-        return result
diff --git a/tests/base/model_valid_steps.py b/tests/base/model_valid_steps.py
index e23e62dccdaba..a008a6cecf110 100644
--- a/tests/base/model_valid_steps.py
+++ b/tests/base/model_valid_steps.py
@@ -71,25 +71,6 @@ def validation_step_no_monitor(self, batch, batch_idx, *args, **kwargs):
         })
         return output
 
-    def validation_step_result_obj(self, batch, batch_idx, *args, **kwargs):
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        loss_val = self.loss(y, y_hat)
-
-        # acc
-        labels_hat = torch.argmax(y_hat, dim=1)
-        val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
-        val_acc = torch.tensor(val_acc).type_as(x)
-
-        result = EvalResult(checkpoint_on=loss_val, early_stop_on=loss_val)
-        result.log_dict({
-            'val_loss': loss_val,
-            'val_acc': val_acc,
-        })
-        return result
-
     def validation_step_result_obj_dp(self, batch, batch_idx, *args, **kwargs):
         x, y = batch
         x = x.view(x.size(0), -1)
diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py
index 31154eac1bf0d..9817dfa4526c6 100644
--- a/tests/checkpointing/test_model_checkpoint.py
+++ b/tests/checkpointing/test_model_checkpoint.py
@@ -12,15 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-import os.path as osp
 import pickle
 import platform
 import re
 from argparse import Namespace
-from distutils.version import LooseVersion
 from pathlib import Path
 from unittest import mock
-from unittest.mock import MagicMock, Mock
+from unittest.mock import Mock
 
 import cloudpickle
 import pytest
@@ -641,20 +639,17 @@ def validation_epoch_end(self, outputs):
 @pytest.mark.parametrize("enable_pl_optimizer", [False, True])
 def test_checkpoint_repeated_strategy(enable_pl_optimizer, tmpdir):
     """
-    This test validates that the checkpoint can be called when provided to callacks list
+    This test validates that the checkpoint can be called when provided to callbacks list
     """
-
     checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath=tmpdir, filename="{epoch:02d}")
 
     class ExtendedBoringModel(BoringModel):
-
         def validation_step(self, batch, batch_idx):
             output = self.layer(batch)
             loss = self.loss(batch, output)
             return {"val_loss": loss}
 
     model = ExtendedBoringModel()
-    model.validation_step_end = None
     model.validation_epoch_end = None
     trainer = Trainer(
         max_epochs=1,
@@ -663,92 +658,30 @@ def validation_step(self, batch, batch_idx):
         limit_test_batches=2,
         callbacks=[checkpoint_callback],
         enable_pl_optimizer=enable_pl_optimizer,
+        weights_summary=None,
+        progress_bar_refresh_rate=0,
     )
-
     trainer.fit(model)
     assert os.listdir(tmpdir) == ['epoch=00.ckpt']
 
-    def get_last_checkpoint():
-        ckpts = os.listdir(tmpdir)
-        ckpts_map = {int(x.split("=")[1].split('.')[0]): osp.join(tmpdir, x) for x in ckpts if "epoch" in x}
-        num_ckpts = len(ckpts_map) - 1
-        return ckpts_map[num_ckpts]
-
-    for idx in range(1, 5):
+    for idx in range(4):
         # load from checkpoint
-        chk = get_last_checkpoint()
-        model = BoringModel.load_from_checkpoint(chk)
-        trainer = pl.Trainer(
-            max_epochs=1,
-            limit_train_batches=2,
-            limit_val_batches=2,
-            limit_test_batches=2,
-            resume_from_checkpoint=chk,
-            enable_pl_optimizer=enable_pl_optimizer)
-        trainer.fit(model)
-        trainer.test(model)
-
-        assert str(os.listdir(tmpdir)) == "['epoch=00.ckpt']"
-
-
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@pytest.mark.parametrize("enable_pl_optimizer", [False, True])
-def test_checkpoint_repeated_strategy_tmpdir(enable_pl_optimizer, tmpdir):
-    """
-    This test validates that the checkpoint can be called when provided to callacks list
-    """
-
-    checkpoint_callback = ModelCheckpoint(monitor='val_loss', filepath=os.path.join(tmpdir, "{epoch:02d}"))
-
-    class ExtendedBoringModel(BoringModel):
-
-        def validation_step(self, batch, batch_idx):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            return {"val_loss": loss}
-
-    model = ExtendedBoringModel()
-    model.validation_step_end = None
-    model.validation_epoch_end = None
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        max_epochs=1,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        limit_test_batches=2,
-        callbacks=[checkpoint_callback],
-        enable_pl_optimizer=enable_pl_optimizer,
-    )
-
-    trainer.fit(model)
-    assert sorted(os.listdir(tmpdir)) == sorted(['epoch=00.ckpt', 'lightning_logs'])
-    path_to_lightning_logs = osp.join(tmpdir, 'lightning_logs')
-    assert sorted(os.listdir(path_to_lightning_logs)) == sorted(['version_0'])
-
-    def get_last_checkpoint():
-        ckpts = os.listdir(tmpdir)
-        ckpts_map = {int(x.split("=")[1].split('.')[0]): osp.join(tmpdir, x) for x in ckpts if "epoch" in x}
-        num_ckpts = len(ckpts_map) - 1
-        return ckpts_map[num_ckpts]
-
-    for idx in range(1, 5):
-
-        # load from checkpoint
-        chk = get_last_checkpoint()
-        model = LogInTwoMethods.load_from_checkpoint(chk)
+        model = LogInTwoMethods.load_from_checkpoint(checkpoint_callback.best_model_path)
         trainer = pl.Trainer(
             default_root_dir=tmpdir,
             max_epochs=1,
             limit_train_batches=2,
             limit_val_batches=2,
             limit_test_batches=2,
-            resume_from_checkpoint=chk,
-            enable_pl_optimizer=enable_pl_optimizer)
-
+            resume_from_checkpoint=checkpoint_callback.best_model_path,
+            enable_pl_optimizer=enable_pl_optimizer,
+            weights_summary=None,
+            progress_bar_refresh_rate=0,
+        )
         trainer.fit(model)
-        trainer.test(model)
-        assert sorted(os.listdir(tmpdir)) == sorted(['epoch=00.ckpt', 'lightning_logs'])
-        assert sorted(os.listdir(path_to_lightning_logs)) == sorted([f'version_{i}' for i in range(idx + 1)])
+        trainer.test(model, verbose=False)
+    assert set(os.listdir(tmpdir)) == {'epoch=00.ckpt', 'lightning_logs'}
+    assert set(os.listdir(tmpdir.join("lightning_logs"))) == {f'version_{i}' for i in range(4)}
 
 
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
@@ -760,21 +693,22 @@ def test_checkpoint_repeated_strategy_extended(enable_pl_optimizer, tmpdir):
     """
 
     class ExtendedBoringModel(BoringModel):
-
         def validation_step(self, batch, batch_idx):
             output = self.layer(batch)
             loss = self.loss(batch, output)
             return {"val_loss": loss}
 
+        def validation_epoch_end(self, *_):
+            ...
+
     def assert_trainer_init(trainer):
         assert not trainer.checkpoint_connector.has_trained
         assert trainer.global_step == 0
         assert trainer.current_epoch == 0
 
     def get_last_checkpoint(ckpt_dir):
-        ckpts = os.listdir(ckpt_dir)
-        ckpts.sort()
-        return osp.join(ckpt_dir, ckpts[-1])
+        last = ckpt_dir.listdir(sort=True)[-1]
+        return str(last)
 
     def assert_checkpoint_content(ckpt_dir):
         chk = pl_load(get_last_checkpoint(ckpt_dir))
@@ -782,23 +716,15 @@ def assert_checkpoint_content(ckpt_dir):
         assert chk["global_step"] == 4
 
     def assert_checkpoint_log_dir(idx):
-        lightning_logs_path = osp.join(tmpdir, 'lightning_logs')
-        assert sorted(os.listdir(lightning_logs_path)) == [f'version_{i}' for i in range(idx + 1)]
-        assert len(os.listdir(ckpt_dir)) == epochs
-
-    def get_model():
-        model = ExtendedBoringModel()
-        model.validation_step_end = None
-        model.validation_epoch_end = None
-        return model
+        lightning_logs = tmpdir / 'lightning_logs'
+        actual = [d.basename for d in lightning_logs.listdir(sort=True)]
+        assert actual == [f'version_{i}' for i in range(idx + 1)]
+        assert len(ckpt_dir.listdir()) == epochs
 
-    ckpt_dir = osp.join(tmpdir, 'checkpoints')
+    ckpt_dir = tmpdir / 'checkpoints'
     checkpoint_cb = ModelCheckpoint(dirpath=ckpt_dir, save_top_k=-1)
     epochs = 2
     limit_train_batches = 2
-
-    model = get_model()
-
     trainer_config = dict(
         default_root_dir=tmpdir,
         max_epochs=epochs,
@@ -806,40 +732,32 @@ def get_model():
         limit_val_batches=3,
         limit_test_batches=4,
         enable_pl_optimizer=enable_pl_optimizer,
-    )
-
-    trainer = pl.Trainer(
-        **trainer_config,
         callbacks=[checkpoint_cb],
     )
+    trainer = pl.Trainer(**trainer_config)
     assert_trainer_init(trainer)
 
+    model = ExtendedBoringModel()
     trainer.fit(model)
     assert trainer.checkpoint_connector.has_trained
     assert trainer.global_step == epochs * limit_train_batches
     assert trainer.current_epoch == epochs - 1
     assert_checkpoint_log_dir(0)
+    assert_checkpoint_content(ckpt_dir)
 
     trainer.test(model)
     assert trainer.current_epoch == epochs - 1
 
-    assert_checkpoint_content(ckpt_dir)
-
     for idx in range(1, 5):
         chk = get_last_checkpoint(ckpt_dir)
         assert_checkpoint_content(ckpt_dir)
 
-        checkpoint_cb = ModelCheckpoint(dirpath=ckpt_dir, save_top_k=-1)
-        model = get_model()
-
         # load from checkpoint
-        trainer = pl.Trainer(
-            **trainer_config,
-            resume_from_checkpoint=chk,
-            callbacks=[checkpoint_cb],
-        )
+        trainer_config["callbacks"] = [ModelCheckpoint(dirpath=ckpt_dir, save_top_k=-1)]
+        trainer = pl.Trainer(**trainer_config, resume_from_checkpoint=chk)
         assert_trainer_init(trainer)
 
+        model = ExtendedBoringModel()
         trainer.test(model)
         assert not trainer.checkpoint_connector.has_trained
         assert trainer.global_step == epochs * limit_train_batches
diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py
index a7054a3a7ef49..e3a597063d02e 100644
--- a/tests/core/test_lightning_module.py
+++ b/tests/core/test_lightning_module.py
@@ -103,3 +103,47 @@ def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx,
     assert sgd_zero_grad.call_count == 4
     assert adam_step.call_count == 2
     assert adam_zero_grad.call_count == 2
+
+
+@pytest.mark.parametrize("enable_pl_optimizer", [False, True])
+def test_params_groups_and_state_are_accessible(enable_pl_optimizer, tmpdir):
+
+    with patch("torch.optim.SGD.step") as sgd_step, \
+         patch("torch.optim.SGD.zero_grad") as sgd_zero_grad, \
+         patch("torch.optim.Adam.step") as adam_step, \
+         patch("torch.optim.Adam.zero_grad") as adam_zero_grad:
+
+        class TestModel(BoringModel):
+
+            def training_step(self, batch, batch_idx, optimizer_idx):
+                output = self.layer(batch)
+                loss = self.loss(batch, output)
+                return {"loss": loss}
+
+            def configure_optimizers(self):
+                optimizer = SGD(self.layer.parameters(), lr=0.1)
+                optimizer_2 = Adam(self.layer.parameters(), lr=0.1)
+                return [optimizer, optimizer_2]
+
+            def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_idx, closure,
+                               on_tpu=False, using_native_amp=False, using_lbfgs=False):
+                # warm up lr
+                if self.trainer.global_step < 500:
+                    lr_scale = min(1., float(self.trainer.global_step + 1) / 500.)
+                    for pg in optimizer.param_groups:
+                        pg['lr'] = lr_scale * 0.01
+
+                optimizer.step(closure=closure)
+
+        model = TestModel()
+        model.training_epoch_end = None
+
+        trainer = Trainer(
+            max_epochs=1,
+            default_root_dir=tmpdir,
+            limit_train_batches=8,
+            accumulate_grad_batches=1,
+            enable_pl_optimizer=enable_pl_optimizer
+        )
+
+        trainer.fit(model)
diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py
index 16963a2af3c0d..a9fcf918cc699 100644
--- a/tests/core/test_lightning_optimizer.py
+++ b/tests/core/test_lightning_optimizer.py
@@ -193,12 +193,29 @@ def test_state(tmpdir):
     model = torch.nn.Linear(3, 4)
     optimizer = torch.optim.Adam(model.parameters())
     lightning_optimizer = LightningOptimizer(optimizer)
+
+    # test state
+    assert optimizer.state == lightning_optimizer.state
+    lightning_optimizer.state = optimizer.state
+    assert optimizer.state == lightning_optimizer.state
+
+    # test param_groups
+    assert optimizer.param_groups == lightning_optimizer.param_groups
+    lightning_optimizer.param_groups = optimizer.param_groups
+    assert optimizer.param_groups == lightning_optimizer.param_groups
+
+    # test defaults
+    assert optimizer.defaults == lightning_optimizer.defaults
+    lightning_optimizer.defaults = optimizer.defaults
+    assert optimizer.defaults == lightning_optimizer.defaults
+
     assert isinstance(lightning_optimizer, LightningOptimizer)
     assert isinstance(lightning_optimizer, Adam)
     assert isinstance(lightning_optimizer, Optimizer)
     lightning_dict = {}
     special_attrs = ["_accumulate_grad_batches", "_optimizer", "_optimizer_idx", "_support_closure",
-                     "_trainer"]
+                     "_trainer", "__getstate__", "__setstate__", "state_dict", "load_state_dict",
+                     "zero_grad", "__setstate__", "add_param_group"]
     for k, v in lightning_optimizer.__dict__.items():
         if k not in special_attrs:
             lightning_dict[k] = v
@@ -207,55 +224,6 @@ def test_state(tmpdir):
     assert optimizer.state == lightning_optimizer.state
 
 
-def test_lightning_optimizer_with_wrong_optimizer_interface(tmpdir):
-    class OptimizerWrapper(object):
-        def __init__(self, optimizer):
-            self.optim = optimizer
-            self.state_dict = self.optim.state_dict
-            self.load_state_dict = self.optim.load_state_dict
-            self.zero_grad = self.optim.zero_grad
-            self.add_param_group = self.optim.add_param_group
-            self.__setstate__ = self.optim.__setstate__
-            self.__getstate__ = self.optim.__getstate__
-            self.__repr__ = self.optim.__repr__
-
-        @property
-        def __class__(self):
-            return Optimizer
-
-        @property
-        def state(self):
-            return self.optim.state
-
-        @property
-        def param_groups(self):
-            return self.optim.param_groups
-
-        @param_groups.setter
-        def param_groups(self, value):
-            self.optim.param_groups = value
-
-        def step(self):
-            # wrongly defined step. Should contain closure
-            self.optim.step(closure=None)
-
-    class TestLightningOptimizerModel(BoringModel):
-
-        def configure_optimizers(self):
-            optimizer = torch.optim.Adam(self.parameters(), lr=0.1)
-            optimizer = OptimizerWrapper(optimizer)
-            return [optimizer]
-
-    model = TestLightningOptimizerModel()
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        max_epochs=1,
-        weights_summary=None,
-        log_every_n_steps=1,
-    )
-    trainer.fit(model)
-
-
 def test_lightning_optimizer_automatic_optimization(tmpdir):
     """
     Test lightning optimize works with make_optimizer_step in automatic_optimization
diff --git a/tests/core/test_results.py b/tests/core/test_results.py
index f4486ce6ae419..797004b7f21ff 100644
--- a/tests/core/test_results.py
+++ b/tests/core/test_results.py
@@ -18,7 +18,7 @@
 import torch
 import torch.distributed as dist
 import torch.multiprocessing as mp
-from pytorch_lightning import Trainer, seed_everything
+from pytorch_lightning import Trainer
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
 import tests.base.develop_utils as tutils
 
diff --git a/tests/deprecated_api/__init__.py b/tests/deprecated_api/__init__.py
new file mode 100644
index 0000000000000..99e21d1ed6b22
--- /dev/null
+++ b/tests/deprecated_api/__init__.py
@@ -0,0 +1,21 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test deprecated functionality which will be removed in vX.Y.Z"""
+import sys
+
+
+def _soft_unimport_module(str_module):
+    # once the module is imported  e.g with parsing with pytest it lives in memory
+    if str_module in sys.modules:
+        del sys.modules[str_module]
diff --git a/tests/deprecated_api/test_remove_1-2.py b/tests/deprecated_api/test_remove_1-2.py
new file mode 100644
index 0000000000000..331208d56df10
--- /dev/null
+++ b/tests/deprecated_api/test_remove_1-2.py
@@ -0,0 +1,45 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test deprecated functionality which will be removed in vX.Y.Z"""
+
+import pytest
+import torch
+
+from pytorch_lightning.callbacks import ModelCheckpoint
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+def test_tbd_remove_in_v1_2_0():
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        ModelCheckpoint(filepath='..')
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        ModelCheckpoint('..')
+
+    with pytest.raises(MisconfigurationException, match='inputs which are not feasible'):
+        ModelCheckpoint(filepath='..', dirpath='.')
+
+
+def test_tbd_remove_in_v1_2_0_metrics():
+    from pytorch_lightning.metrics.classification import Fbeta
+    from pytorch_lightning.metrics.functional.classification import f1_score, fbeta_score
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        Fbeta(2)
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        fbeta_score(torch.tensor([0, 1, 2, 3]), torch.tensor([0, 1, 2, 1]), 0.2)
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        f1_score(torch.tensor([0, 1, 0, 1]), torch.tensor([0, 1, 0, 0]))
diff --git a/tests/test_deprecated.py b/tests/deprecated_api/test_remove_1-3.py
similarity index 52%
rename from tests/test_deprecated.py
rename to tests/deprecated_api/test_remove_1-3.py
index f549de1f4d71e..7ec69796b1e46 100644
--- a/tests/test_deprecated.py
+++ b/tests/deprecated_api/test_remove_1-3.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Test deprecated functionality which will be removed in vX.Y.Z"""
-import sys
 from argparse import ArgumentParser
 from unittest import mock
 
@@ -21,10 +20,8 @@
 
 from pytorch_lightning import LightningModule, Trainer
 from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
-from pytorch_lightning.metrics.functional.classification import auc
 from pytorch_lightning.profiler.profilers import PassThroughProfiler, SimpleProfiler
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from tests.base import EvalModelTemplate
 
 
 def test_tbd_remove_in_v1_3_0(tmpdir):
@@ -51,15 +48,61 @@ def __init__(self, hparams):
         DeprecatedHparamsModel({})
 
 
-def test_tbd_remove_in_v1_2_0():
-    with pytest.deprecated_call(match='will be removed in v1.2'):
-        checkpoint_cb = ModelCheckpoint(filepath='.')
+def test_tbd_remove_in_v1_3_0_metrics():
+    from pytorch_lightning.metrics.functional.classification import to_onehot
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        to_onehot(torch.tensor([1, 2, 3]))
+
+    from pytorch_lightning.metrics.functional.classification import to_categorical
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        to_categorical(torch.tensor([[0.2, 0.5], [0.9, 0.1]]))
+
+    from pytorch_lightning.metrics.functional.classification import get_num_classes
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        get_num_classes(pred=torch.tensor([0, 1]), target=torch.tensor([1, 1]))
 
-    with pytest.deprecated_call(match='will be removed in v1.2'):
-        checkpoint_cb = ModelCheckpoint('.')
+    x_binary = torch.tensor([0, 1, 2, 3])
+    y_binary = torch.tensor([0, 1, 2, 3])
 
-    with pytest.raises(MisconfigurationException, match='inputs which are not feasible'):
-        checkpoint_cb = ModelCheckpoint(filepath='.', dirpath='.')
+    from pytorch_lightning.metrics.functional.classification import roc
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        roc(pred=x_binary, target=y_binary)
+
+    from pytorch_lightning.metrics.functional.classification import _roc
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        _roc(pred=x_binary, target=y_binary)
+
+    x_multy = torch.tensor([[0.85, 0.05, 0.05, 0.05],
+                            [0.05, 0.85, 0.05, 0.05],
+                            [0.05, 0.05, 0.85, 0.05],
+                            [0.05, 0.05, 0.05, 0.85]])
+    y_multy = torch.tensor([0, 1, 3, 2])
+
+    from pytorch_lightning.metrics.functional.classification import multiclass_roc
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        multiclass_roc(pred=x_multy, target=y_multy)
+
+    from pytorch_lightning.metrics.functional.classification import average_precision
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        average_precision(pred=x_binary, target=y_binary)
+
+    from pytorch_lightning.metrics.functional.classification import precision_recall_curve
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        precision_recall_curve(pred=x_binary, target=y_binary)
+
+    from pytorch_lightning.metrics.functional.classification import multiclass_precision_recall_curve
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        multiclass_precision_recall_curve(pred=x_multy, target=y_multy)
+
+    from pytorch_lightning.metrics.functional.reduction import reduce
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        reduce(torch.tensor([0, 1, 1, 0]), 'sum')
+
+    from pytorch_lightning.metrics.functional.reduction import class_reduce
+    with pytest.deprecated_call(match='will be removed in v1.3'):
+        class_reduce(torch.randint(1, 10, (50,)).float(),
+                     torch.randint(10, 20, (50,)).float(),
+                     torch.randint(1, 100, (50,)).float())
 
 
 # TODO: remove bool from Trainer.profiler param in v1.3.0, update profiler_connector.py
@@ -68,6 +111,7 @@ def test_tbd_remove_in_v1_2_0():
     (False, PassThroughProfiler),
 ])
 def test_trainer_profiler_remove_in_v1_3_0(profiler, expected):
+    # remove bool from Trainer.profiler param in v1.3.0, update profiler_connector.py
     with pytest.deprecated_call(match='will be removed in v1.3'):
         trainer = Trainer(profiler=profiler)
         assert isinstance(trainer.profiler, expected)
@@ -91,47 +135,3 @@ def test_trainer_cli_profiler_remove_in_v1_3_0(cli_args, expected_parsed_arg, ex
     assert getattr(args, "profiler") == expected_parsed_arg
     trainer = Trainer.from_argparse_args(args)
     assert isinstance(trainer.profiler, expected_profiler)
-
-
-def _soft_unimport_module(str_module):
-    # once the module is imported  e.g with parsing with pytest it lives in memory
-    if str_module in sys.modules:
-        del sys.modules[str_module]
-
-
-class ModelVer0_6(EvalModelTemplate):
-
-    # todo: this shall not be needed while evaluate asks for dataloader explicitly
-    def val_dataloader(self):
-        return self.dataloader(train=False)
-
-    def validation_step(self, batch, batch_idx, *args, **kwargs):
-        return {'val_loss': torch.tensor(0.6)}
-
-    def validation_end(self, outputs):
-        return {'val_loss': torch.tensor(0.6)}
-
-    def test_dataloader(self):
-        return self.dataloader(train=False)
-
-    def test_end(self, outputs):
-        return {'test_loss': torch.tensor(0.6)}
-
-
-class ModelVer0_7(EvalModelTemplate):
-
-    # todo: this shall not be needed while evaluate asks for dataloader explicitly
-    def val_dataloader(self):
-        return self.dataloader(train=False)
-
-    def validation_step(self, batch, batch_idx, *args, **kwargs):
-        return {'val_loss': torch.tensor(0.7)}
-
-    def validation_end(self, outputs):
-        return {'val_loss': torch.tensor(0.7)}
-
-    def test_dataloader(self):
-        return self.dataloader(train=False)
-
-    def test_end(self, outputs):
-        return {'test_loss': torch.tensor(0.7)}
diff --git a/tests/metrics/functional/test_classification.py b/tests/metrics/functional/test_classification.py
index f7bd7d558f5b4..a6fbe9e849785 100644
--- a/tests/metrics/functional/test_classification.py
+++ b/tests/metrics/functional/test_classification.py
@@ -17,13 +17,13 @@
     accuracy,
     precision,
     recall,
-    _binary_clf_curve,
     dice_score,
     auroc,
     multiclass_auroc,
     auc,
     iou,
 )
+from pytorch_lightning.metrics.functional.precision_recall_curve import _binary_clf_curve
 from pytorch_lightning.metrics.utils import to_onehot, get_num_classes, to_categorical
 
 
@@ -222,7 +222,7 @@ def test_binary_clf_curve(sample_weight, pos_label, exp_shape):
     if sample_weight is not None:
         sample_weight = torch.ones_like(pred) * sample_weight
 
-    fps, tps, thresh = _binary_clf_curve(pred, target, sample_weight, pos_label)
+    fps, tps, thresh = _binary_clf_curve(preds=pred, target=target, sample_weights=sample_weight, pos_label=pos_label)
 
     assert isinstance(tps, torch.Tensor)
     assert isinstance(fps, torch.Tensor)
diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py
index a3919a6a8a7dd..82727d37479b6 100644
--- a/tests/models/test_onnx.py
+++ b/tests/models/test_onnx.py
@@ -21,44 +21,44 @@
 import tests.base.develop_pipelines as tpipes
 import tests.base.develop_utils as tutils
 from pytorch_lightning import Trainer
-from tests.base import EvalModelTemplate
+from tests.base import BoringModel, EvalModelTemplate
 
 
 def test_model_saves_with_input_sample(tmpdir):
     """Test that ONNX model saves with input sample and size is greater than 3 MB"""
-    model = EvalModelTemplate()
+    model = BoringModel()
     trainer = Trainer(max_epochs=1)
     trainer.fit(model)
 
     file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = torch.randn((1, 28 * 28))
+    input_sample = torch.randn((1, 32))
     model.to_onnx(file_path, input_sample)
     assert os.path.isfile(file_path)
-    assert os.path.getsize(file_path) > 3e+06
+    assert os.path.getsize(file_path) > 4e2
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
 def test_model_saves_on_gpu(tmpdir):
     """Test that model saves on gpu"""
-    model = EvalModelTemplate()
+    model = BoringModel()
     trainer = Trainer(gpus=1, max_epochs=1)
     trainer.fit(model)
 
     file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = torch.randn((1, 28 * 28))
+    input_sample = torch.randn((1, 32))
     model.to_onnx(file_path, input_sample)
     assert os.path.isfile(file_path)
-    assert os.path.getsize(file_path) > 3e+06
+    assert os.path.getsize(file_path) > 4e2
 
 
 def test_model_saves_with_example_output(tmpdir):
     """Test that ONNX model saves when provided with example output"""
-    model = EvalModelTemplate()
+    model = BoringModel()
     trainer = Trainer(max_epochs=1)
     trainer.fit(model)
 
     file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = torch.randn((1, 28 * 28))
+    input_sample = torch.randn((1, 32))
     model.eval()
     example_outputs = model.forward(input_sample)
     model.to_onnx(file_path, input_sample, example_outputs=example_outputs)
@@ -67,11 +67,13 @@ def test_model_saves_with_example_output(tmpdir):
 
 def test_model_saves_with_example_input_array(tmpdir):
     """Test that ONNX model saves with_example_input_array and size is greater than 3 MB"""
-    model = EvalModelTemplate()
+    model = BoringModel()
+    model.example_input_array = torch.randn(5, 32)
+
     file_path = os.path.join(tmpdir, "model.onnx")
     model.to_onnx(file_path)
     assert os.path.exists(file_path) is True
-    assert os.path.getsize(file_path) > 3e+06
+    assert os.path.getsize(file_path) > 4e2
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@@ -100,7 +102,9 @@ def test_model_saves_on_multi_gpu(tmpdir):
 
 def test_verbose_param(tmpdir, capsys):
     """Test that output is present when verbose parameter is set"""
-    model = EvalModelTemplate()
+    model = BoringModel()
+    model.example_input_array = torch.randn(5, 32)
+
     file_path = os.path.join(tmpdir, "model.onnx")
     model.to_onnx(file_path, verbose=True)
     captured = capsys.readouterr()
@@ -108,8 +112,8 @@ def test_verbose_param(tmpdir, capsys):
 
 
 def test_error_if_no_input(tmpdir):
-    """Test that an exception is thrown when there is no input tensor"""
-    model = EvalModelTemplate()
+    """Test that an error is thrown when there is no input tensor"""
+    model = BoringModel()
     model.example_input_array = None
     file_path = os.path.join(tmpdir, "model.onnx")
     with pytest.raises(ValueError, match=r'Could not export to ONNX since neither `input_sample` nor'
@@ -117,21 +121,12 @@ def test_error_if_no_input(tmpdir):
         model.to_onnx(file_path)
 
 
-def test_error_if_input_sample_is_not_tensor(tmpdir):
-    """Test that an exception is thrown when there is no input tensor"""
-    model = EvalModelTemplate()
-    model.example_input_array = None
-    file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = np.random.randn(1, 28 * 28)
-    with pytest.raises(ValueError, match=f'Received `input_sample` of type {type(input_sample)}. Expected type is '
-                                         f'`Tensor`'):
-        model.to_onnx(file_path, input_sample)
-
-
 def test_if_inference_output_is_valid(tmpdir):
     """Test that the output inferred from ONNX model is same as from PyTorch"""
-    model = EvalModelTemplate()
-    trainer = Trainer(max_epochs=5)
+    model = BoringModel()
+    model.example_input_array = torch.randn(5, 32)
+
+    trainer = Trainer(max_epochs=2)
     trainer.fit(model)
 
     model.eval()
diff --git a/tests/models/test_torchscript.py b/tests/models/test_torchscript.py
index bf2c34b8bfef5..3c43b201f52e4 100644
--- a/tests/models/test_torchscript.py
+++ b/tests/models/test_torchscript.py
@@ -16,43 +16,72 @@
 import pytest
 import torch
 
-from tests.base import EvalModelTemplate
+from tests.base import BoringModel
 from tests.base.datamodules import TrialMNISTDataModule
 from tests.base.models import ParityModuleRNN, BasicGAN
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
 def test_torchscript_input_output(modelclass):
     """ Test that scripted LightningModule forward works. """
     model = modelclass()
+
+    if isinstance(model, BoringModel):
+        model.example_input_array = torch.randn(5, 32)
+
     script = model.to_torchscript()
     assert isinstance(script, torch.jit.ScriptModule)
+
     model.eval()
-    model_output = model(model.example_input_array)
+    with torch.no_grad():
+        model_output = model(model.example_input_array)
+
     script_output = script(model.example_input_array)
     assert torch.allclose(script_output, model_output)
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
-def test_torchscript_input_output_trace(modelclass):
-    """ Test that traced LightningModule forward works. """
+def test_torchscript_example_input_output_trace(modelclass):
+    """ Test that traced LightningModule forward works with example_input_array """
     model = modelclass()
+
+    if isinstance(model, BoringModel):
+        model.example_input_array = torch.randn(5, 32)
+
     script = model.to_torchscript(method='trace')
     assert isinstance(script, torch.jit.ScriptModule)
+
     model.eval()
-    model_output = model(model.example_input_array)
+    with torch.no_grad():
+        model_output = model(model.example_input_array)
+
     script_output = script(model.example_input_array)
     assert torch.allclose(script_output, model_output)
 
 
+def test_torchscript_input_output_trace():
+    """ Test that traced LightningModule forward works with example_inputs """
+    model = BoringModel()
+    example_inputs = torch.randn(1, 32)
+    script = model.to_torchscript(example_inputs=example_inputs, method='trace')
+    assert isinstance(script, torch.jit.ScriptModule)
+
+    model.eval()
+    with torch.no_grad():
+        model_output = model(example_inputs)
+
+    script_output = script(example_inputs)
+    assert torch.allclose(script_output, model_output)
+
+
 @pytest.mark.parametrize("device", [
     torch.device("cpu"),
     torch.device("cuda", 0)
@@ -60,7 +89,9 @@ def test_torchscript_input_output_trace(modelclass):
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU machine")
 def test_torchscript_device(device):
     """ Test that scripted module is on the correct device. """
-    model = EvalModelTemplate().to(device)
+    model = BoringModel().to(device)
+    model.example_input_array = torch.randn(5, 32)
+
     script = model.to_torchscript()
     assert next(script.parameters()).device == device
     script_output = script(model.example_input_array.to(device))
@@ -69,7 +100,7 @@ def test_torchscript_device(device):
 
 def test_torchscript_retain_training_state():
     """ Test that torchscript export does not alter the training mode of original model. """
-    model = EvalModelTemplate()
+    model = BoringModel()
     model.train(True)
     script = model.to_torchscript()
     assert model.training
@@ -81,7 +112,7 @@ def test_torchscript_retain_training_state():
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
@@ -100,7 +131,7 @@ def test_torchscript_properties(modelclass):
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
@@ -109,9 +140,27 @@ def test_torchscript_properties(modelclass):
     reason="torch.save/load has bug loading script modules on torch <= 1.4",
 )
 def test_torchscript_save_load(tmpdir, modelclass):
-    """ Test that scripted LightningModules is correctly saved and can be loaded. """
+    """ Test that scripted LightningModule is correctly saved and can be loaded. """
     model = modelclass()
     output_file = str(tmpdir / "model.pt")
     script = model.to_torchscript(file_path=output_file)
     loaded_script = torch.jit.load(output_file)
     assert torch.allclose(next(script.parameters()), next(loaded_script.parameters()))
+
+
+def test_torchcript_invalid_method(tmpdir):
+    """Test that an error is thrown with invalid torchscript method"""
+    model = BoringModel()
+    model.train(True)
+
+    with pytest.raises(ValueError, match="only supports 'script' or 'trace'"):
+        model.to_torchscript(method='temp')
+
+
+def test_torchscript_with_no_input(tmpdir):
+    """Test that an error is thrown when there is no input tensor"""
+    model = BoringModel()
+    model.example_input_array = None
+
+    with pytest.raises(ValueError, match='requires either `example_inputs` or `model.example_input_array`'):
+        model.to_torchscript(method='trace')
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index e838dc60d81b3..37ab774bc8342 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -244,39 +244,6 @@ def test_distributed_backend_set_when_using_tpu(tmpdir, tpu_cores):
     assert Trainer(tpu_cores=tpu_cores).distributed_backend == "tpu"
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
-@pl_multi_process_test
-def test_result_obj_on_tpu(tmpdir):
-    seed_everything(1234)
-
-    batches = 5
-    epochs = 2
-
-    model = EvalModelTemplate()
-    model.training_step = model.training_step_result_obj
-    model.training_step_end = None
-    model.training_epoch_end = None
-    model.validation_step = model.validation_step_result_obj
-    model.validation_step_end = None
-    model.validation_epoch_end = None
-    model.test_step = model.test_step_result_obj
-    model.test_step_end = None
-    model.test_epoch_end = None
-
-    trainer_options = dict(
-        default_root_dir=tmpdir,
-        max_epochs=epochs,
-        callbacks=[EarlyStopping()],
-        log_every_n_steps=2,
-        limit_train_batches=batches,
-        weights_summary=None,
-        tpu_cores=8
-    )
-
-    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
-
-
 @pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
 @pl_multi_process_test
 def test_broadcast_on_tpu():
diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py
index 9e369a874acd0..33d14e852b285 100644
--- a/tests/trainer/optimization/test_manual_optimization.py
+++ b/tests/trainer/optimization/test_manual_optimization.py
@@ -855,7 +855,7 @@ def automatic_optimization(self) -> bool:
     )
 
     trainer.fit(model)
-    expected_calls = [call() for s in range(2)]
+    expected_calls = [call(closure=ANY) for s in range(2)]
     step_mock.assert_has_calls(expected_calls)
 
 
@@ -933,9 +933,9 @@ def automatic_optimization(self) -> bool:
     )
 
     trainer.fit(model)
-    expected_calls = [call(optim='sgd') for s in range(4)]
+    expected_calls = [call(closure=ANY, optim='sgd') for s in range(4)]
     mock_sgd_step.assert_has_calls(expected_calls)
-    expected_calls = [call() for s in range(2)]
+    expected_calls = [call(closure=ANY) for s in range(2)]
     mock_adam_step.assert_has_calls(expected_calls)
 
 
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 9b29d6ec2b1dd..9e5ceccf9b646 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -11,12 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import glob
 import math
 import os
 import pickle
 import sys
-import types
 from argparse import Namespace
 from copy import deepcopy
 from pathlib import Path
@@ -34,6 +32,7 @@
 from pytorch_lightning.loggers import TensorBoardLogger
 from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, SimpleProfiler
 from pytorch_lightning.trainer.logging import TrainerLoggingMixin
+from pytorch_lightning.trainer.states import TrainerState
 from pytorch_lightning.utilities import NATIVE_AMP_AVAILABLE
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -61,6 +60,7 @@ def test_no_val_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt):
     result = trainer.fit(model)
     # training complete
     assert result == 1, "amp + ddp model failed to complete"
+    assert trainer.state == TrainerState.FINISHED
 
     # save model
     new_weights_path = os.path.join(tmpdir, "save_test.ckpt")
@@ -107,6 +107,7 @@ def test_no_val_end_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt):
 
     # traning complete
     assert result == 1, "amp + ddp model failed to complete"
+    assert trainer.state == TrainerState.FINISHED
 
     # save model
     new_weights_path = os.path.join(tmpdir, "save_test.ckpt")
@@ -151,6 +152,7 @@ def test_strict_model_load(monkeypatch, tmpdir, tmpdir_server, url_ckpt):
 
     # traning complete
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
 
     # save model
     new_weights_path = os.path.join(tmpdir, "save_test.ckpt")
@@ -468,6 +470,7 @@ def test_model_checkpoint_only_weights(tmpdir):
     result = trainer.fit(model)
     # training complete
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
 
     checkpoint_path = list(trainer.checkpoint_callback.best_k_models.keys())[0]
 
@@ -507,35 +510,23 @@ def test_resume_from_checkpoint_epoch_restored(monkeypatch, tmpdir, tmpdir_serve
     # set $TORCH_HOME, which determines torch hub's cache path, to tmpdir
     monkeypatch.setenv("TORCH_HOME", tmpdir)
 
-    hparams = EvalModelTemplate.get_default_hparams()
-
-    def _new_model():
-        # Create a model that tracks epochs and batches seen
-        model = EvalModelTemplate(**hparams)
-        model.num_epochs_seen = 0
-        model.num_batches_seen = 0
-        model.num_on_load_checkpoint_called = 0
+    class TestModel(BoringModel):
+        # Model that tracks epochs and batches seen
+        num_epochs_seen = 0
+        num_batches_seen = 0
+        num_on_load_checkpoint_called = 0
 
-        def increment_epoch(self):
+        def on_epoch_end(self):
             self.num_epochs_seen += 1
 
-        def increment_batch(self, batch, batch_idx, dataloader_idx):
+        def on_train_batch_start(self, *_):
             self.num_batches_seen += 1
 
-        def increment_on_load_checkpoint(self, _):
+        def on_load_checkpoint(self, _):
             self.num_on_load_checkpoint_called += 1
 
-        # Bind methods to keep track of epoch numbers, batch numbers it has seen
-        # as well as number of times it has called on_load_checkpoint()
-        model.on_epoch_end = types.MethodType(increment_epoch, model)
-        model.on_train_batch_start = types.MethodType(increment_batch, model)
-        model.on_load_checkpoint = types.MethodType(increment_on_load_checkpoint, model)
-        return model
-
-    model = _new_model()
-
-    trainer_options = dict(
-        progress_bar_refresh_rate=0,
+    model = TestModel()
+    trainer = Trainer(
         max_epochs=2,
         limit_train_batches=0.65,
         limit_val_batches=1,
@@ -543,144 +534,125 @@ def increment_on_load_checkpoint(self, _):
         default_root_dir=tmpdir,
         val_check_interval=1.0,
         enable_pl_optimizer=enable_pl_optimizer,
+        progress_bar_refresh_rate=0,
+        logger=False,
+        weights_summary=None,
     )
-
-    trainer = Trainer(**trainer_options)
-    # fit model
     trainer.fit(model)
 
-    training_batches = trainer.num_training_batches
-
     assert model.num_epochs_seen == 2
-    assert model.num_batches_seen == training_batches * 2
+    assert model.num_batches_seen == trainer.num_training_batches * 2
     assert model.num_on_load_checkpoint_called == 0
 
     # Other checkpoints can be uncommented if/when resuming mid-epoch is supported
-    checkpoints = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))
+    checkpoints = Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt")
     if url_ckpt:
         # transform local paths into url checkpoints
         ip, port = tmpdir_server
-        checkpoints = [f"http://{ip}:{port}/" + os.path.basename(check) for check in checkpoints]
+        checkpoints = [f"http://{ip}:{port}/" + ckpt.name for ckpt in checkpoints]
 
-    for check in checkpoints:
-        next_model = _new_model()
-        state = pl_load(check)
+    for ckpt in checkpoints:
+        next_model = TestModel()
+        state = pl_load(ckpt)
 
         # Resume training
-        trainer_options["max_epochs"] = 2
-        new_trainer = Trainer(**trainer_options, resume_from_checkpoint=check)
+        new_trainer = Trainer(resume_from_checkpoint=ckpt, max_epochs=2)
         new_trainer.fit(next_model)
-        assert state["global_step"] + next_model.num_batches_seen == training_batches * trainer_options["max_epochs"]
+        assert state["global_step"] + next_model.num_batches_seen == trainer.num_training_batches * trainer.max_epochs
         assert next_model.num_on_load_checkpoint_called == 1
 
 
-def _init_steps_model():
-    """private method for initializing a model with 5% train epochs"""
-    model = EvalModelTemplate()
-
-    # define train epoch to 5% of data
-    train_percent = 0.5
-    # get number of samples in 1 epoch
-    num_train_samples = math.floor(len(model.train_dataloader()) * train_percent)
-
-    trainer_options = dict(
-        limit_train_batches=train_percent,
-    )
-    return model, trainer_options, num_train_samples
-
-
 def test_trainer_max_steps_and_epochs(tmpdir):
     """Verify model trains according to specified max steps"""
-    model, trainer_options, num_train_samples = _init_steps_model()
+    model = BoringModel()
+    num_train_samples = math.floor(len(model.train_dataloader()) * 0.5)
 
     # define less train steps than epochs
-    trainer_options.update(
-        default_root_dir=tmpdir,
-        max_epochs=3,
-        max_steps=num_train_samples + 10,
-    )
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    trainer_kwargs = {
+        'limit_train_batches': 0.5,
+        'default_root_dir': tmpdir,
+        'max_epochs': 3,
+        'max_steps': num_train_samples + 10,
+        'logger': False,
+        'weights_summary': None,
+        'progress_bar_refresh_rate': 0,
+    }
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check training stopped at max_steps
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.global_step == trainer.max_steps, "Model did not stop at max_steps"
 
     # define less train epochs than steps
-    trainer_options.update(
-        max_epochs=2,
-        max_steps=trainer_options["max_epochs"] * 2 * num_train_samples,
-    )
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    trainer_kwargs['max_epochs'] = 2
+    trainer_kwargs['max_steps'] = 3 * 2 * num_train_samples
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check training stopped at max_epochs
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.global_step == num_train_samples * trainer.max_epochs
     assert trainer.current_epoch == trainer.max_epochs - 1, "Model did not stop at max_epochs"
 
 
 def test_trainer_min_steps_and_epochs(tmpdir):
     """Verify model trains according to specified min steps"""
-    model, trainer_options, num_train_samples = _init_steps_model()
-
-    # define callback for stopping the model and default epochs
-    trainer_options.update(
-        default_root_dir=tmpdir,
-        callbacks=[EarlyStopping(monitor="early_stop_on", min_delta=1.0)],
-        val_check_interval=2,
-        min_epochs=1,
-        max_epochs=7,
-    )
-
-    # define less min steps than 1 epoch
-    trainer_options["min_steps"] = math.floor(num_train_samples / 2)
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    model = EvalModelTemplate()
+    num_train_samples = math.floor(len(model.train_dataloader()) * 0.5)
+
+    trainer_kwargs = {
+        'limit_train_batches': 0.5,
+        'default_root_dir': tmpdir,
+        # define callback for stopping the model
+        'callbacks': [EarlyStopping(monitor="early_stop_on", min_delta=1.0)],
+        'val_check_interval': 2,
+        'min_epochs': 1,
+        'max_epochs': 7,
+        # define less min steps than 1 epoch
+        'min_steps': num_train_samples // 2,
+        'logger': False,
+        'weights_summary': None,
+        'progress_bar_refresh_rate': 0,
+    }
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check model ran for at least min_epochs
-    assert (
-        trainer.global_step >= num_train_samples and trainer.current_epoch > 0
-    ), "Model did not train for at least min_epochs"
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
+    assert trainer.current_epoch > 0
+    assert trainer.global_step >= num_train_samples, "Model did not train for at least min_epochs"
 
     # define less epochs than min_steps
-    trainer_options["min_steps"] = math.floor(num_train_samples * 1.5)
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    trainer_kwargs["min_steps"] = math.floor(num_train_samples * 1.5)
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check model ran for at least num_train_samples*1.5
-    assert (
-        trainer.global_step >= math.floor(num_train_samples * 1.5) and trainer.current_epoch > 0
-    ), "Model did not train for at least min_steps"
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
+    assert trainer.current_epoch > 0
+    assert trainer.global_step >= math.floor(num_train_samples * 1.5), "Model did not train for at least min_steps"
 
 
 def test_trainer_max_steps_accumulate_batches(tmpdir):
     """Verify model trains according to specified max steps with grad accumulated batches"""
-    model, trainer_options, num_train_samples = _init_steps_model()
+    model = BoringModel()
+    num_train_samples = math.floor(len(model.train_dataloader()) * 0.5)
 
     # define less train steps than epochs
-    trainer_options.update(
+    trainer = Trainer(
+        limit_train_batches=0.5,
         default_root_dir=tmpdir,
-        max_steps=(num_train_samples + 10),
+        max_steps=num_train_samples + 10,
         accumulate_grad_batches=10,
+        logger=False,
+        weights_summary=None,
+        progress_bar_refresh_rate=0,
     )
-
-    # fit model
-    trainer = Trainer(**trainer_options)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check training stopped at max_steps
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.global_step == trainer.max_steps, "Model did not stop at max_steps"
 
 
@@ -703,6 +675,7 @@ def test_benchmark_option(tmpdir):
 
     # verify training completed
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
 
     # verify torch.backends.cudnn.benchmark is not turned off
     assert torch.backends.cudnn.benchmark
@@ -788,6 +761,7 @@ def training_epoch_end(self, *args, **kwargs):
 
     # check that limit_train_batches=0 turns off training
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 0
     assert not model.training_step_invoked, "`training_step` should not run when `limit_train_batches=0`"
     assert not model.training_epoch_end_invoked, "`training_epoch_end` should not run when `limit_train_batches=0`"
@@ -806,6 +780,7 @@ def training_epoch_end(self, *args, **kwargs):
         assert not torch.all(torch.eq(before_state_dict[key], after_state_dict[key]))
 
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 0
     assert model.training_step_invoked, "did not run `training_step` with `fast_dev_run=True`"
     assert model.training_epoch_end_invoked, "did not run `training_epoch_end` with `fast_dev_run=True`"
@@ -844,6 +819,7 @@ def validation_epoch_end(self, *args, **kwargs):
 
     # check that limit_val_batches=0 turns off validation
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 1
     assert not model.validation_step_invoked, "`validation_step` should not run when `limit_val_batches=0`"
     assert not model.validation_epoch_end_invoked, "`validation_epoch_end` should not run when `limit_val_batches=0`"
@@ -855,6 +831,7 @@ def validation_epoch_end(self, *args, **kwargs):
     result = trainer.fit(model)
 
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 0
     assert model.validation_step_invoked, "did not run `validation_step` with `fast_dev_run=True`"
     assert model.validation_epoch_end_invoked, "did not run `validation_epoch_end` with `fast_dev_run=True`"
@@ -958,6 +935,7 @@ def test_gradient_clipping(tmpdir):
     """
     Test gradient clipping
     """
+    tutils.reset_seed()
 
     model = EvalModelTemplate()
 
@@ -995,6 +973,7 @@ def test_gradient_clipping_fp16(tmpdir):
     """
     Test gradient clipping with fp16
     """
+    tutils.reset_seed()
 
     model = EvalModelTemplate()
 
@@ -1117,7 +1096,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
 @pytest.mark.parametrize(
     "trainer_kwargs,expected",
     [
-        pytest.param(
+        (
             dict(accelerator=None, gpus=None),
             dict(
                 use_dp=False,
@@ -1129,7 +1108,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=None),
             dict(
                 use_dp=False,
@@ -1141,7 +1120,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=None),
             dict(
                 use_dp=False,
@@ -1153,7 +1132,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", gpus=None),
             dict(
                 use_dp=False,
@@ -1165,7 +1144,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", num_processes=2, gpus=None),
             dict(
                 use_dp=False,
@@ -1177,7 +1156,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=2,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", num_nodes=2, gpus=None),
             dict(
                 use_dp=False,
@@ -1189,7 +1168,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=None),
             dict(
                 use_dp=False,
@@ -1201,7 +1180,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=2,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp2", gpus=None),
             dict(
                 use_dp=False,
@@ -1213,7 +1192,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator=None, gpus=1),
             dict(
                 use_dp=False,
@@ -1224,9 +1203,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=True,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=1),
             dict(
                 use_dp=True,
@@ -1237,9 +1215,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=True,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", gpus=1),
             dict(
                 use_dp=False,
@@ -1250,9 +1227,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=True,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=1),
             dict(
                 use_dp=False,
@@ -1263,9 +1239,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=2,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp2", gpus=1),
             dict(
                 use_dp=False,
@@ -1276,9 +1251,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator=None, gpus=2),
             dict(
                 use_dp=False,
@@ -1289,9 +1263,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=2,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=2),
             dict(
                 use_dp=True,
@@ -1302,9 +1275,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", gpus=2),
             dict(
                 use_dp=False,
@@ -1315,9 +1287,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=2,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp2", gpus=2),
             dict(
                 use_dp=False,
@@ -1328,21 +1299,17 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
     ],
 )
-# Todo: mock nb Gpus so all these tests can run on any device
-# todo: think about simplification, that the the expected will be just a list use_xxx which shall be true...
-def test_trainer_config(trainer_kwargs, expected):
+def test_trainer_config(trainer_kwargs, expected, monkeypatch):
+    if trainer_kwargs["gpus"] is not None:
+        monkeypatch.setattr(torch.cuda, "is_available", lambda: True)
+        monkeypatch.setattr(torch.cuda, "device_count", lambda: trainer_kwargs["gpus"])
     trainer = Trainer(**trainer_kwargs)
-    assert trainer.use_dp is expected["use_dp"], 'for input: %s' % trainer_kwargs
-    assert trainer.use_ddp is expected["use_ddp"], 'for input: %s' % trainer_kwargs
-    assert trainer.use_ddp2 is expected["use_ddp2"], 'for input: %s' % trainer_kwargs
-    assert trainer.num_gpus == expected["num_gpus"], 'for input: %s' % trainer_kwargs
-    assert trainer.on_gpu is expected["on_gpu"], 'for input: %s' % trainer_kwargs
-    assert trainer.use_single_gpu is expected["use_single_gpu"], 'for input: %s' % trainer_kwargs
-    assert trainer.num_processes == expected["num_processes"], 'for input: %s' % trainer_kwargs
+    assert len(expected) == 7
+    for k, v in expected.items():
+        assert getattr(trainer, k) == v, f"Failed {k}: {v}"
 
 
 def test_trainer_subclassing():
@@ -1358,6 +1325,7 @@ def __init__(self, custom_arg, *args, custom_kwarg="test", **kwargs):
     trainer = TrainerSubclass(123, custom_kwarg="custom", fast_dev_run=True)
     result = trainer.fit(model)
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.custom_arg == 123
     assert trainer.custom_kwarg == "custom"
     assert trainer.fast_dev_run
@@ -1373,6 +1341,7 @@ def __init__(self, **kwargs):
     trainer = TrainerSubclass(custom_kwarg="custom", fast_dev_run=True)
     result = trainer.fit(model)
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.custom_kwarg == "custom"
     assert trainer.fast_dev_run