diff --git a/.mergify.yml b/.mergify.yml
index 44c48f2ddced5e..cb5ef3ec7519a8 100644
--- a/.mergify.yml
+++ b/.mergify.yml
@@ -12,59 +12,59 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-pull_request_rules:
-
-  - name: Automatic merge on approval
-    conditions:
-      - base=master
-      # number of review approvals
-      - "#approved-reviews-by>=3"
-      # no waiting or assigned review
-      - "#review-requested=0"
-      # no requested chnages from any reviewer
-      - "#changes-requested-reviews-by=0"
-      # this serves as ALL check has to pass as we have actually around 40 tests in total
-      - "#status-success>=54"
-      # this is just in case since we rely on GPU tests (note: redundand to the above)
-      - status-success=continuous-integration/drone/pr
-      - "status-success=ci/circleci: TPU-tests"
-      # this is patter-like, unofrunatly serves as `any(...)` (note: redundand to the above)
-      #- "status-success~=^ci/circleci:"
-      # no conflict with master branch
-      - -conflict
-      # was not closed yet
-      - -closed
-      # filter-out GH draft PRs
-      - -draft
-    actions:
-      delete_head_branch: {}
-      merge:
-        # https://doc.mergify.io/merge-action.html#strict-merge
-        # (on head branch) $ git merge --no-ff base
-        # (on head branch) # Wait for CI to go green
-        # (on head branch) # Squash all commits
-        # (on base branch) $ git merge --ff head
-        strict: true
-        method: squash
-      comment:
-        message: Great job! =)
-
-  - name: warn on conflicts
-    conditions:
-      - conflict
-      # filter-out GH draft PRs
-      - -draft
-    actions:
-      comment:
-        message: This pull request is now in conflict... :(
-
-  - name: add core reviewer
-    conditions:
-      # filter-out GH draft PRs
-      - -draft
-      # number of review approvals
-      - "#approved-reviews-by<3"
-    actions:
-      request_reviews:
-        teams:
-          - core-contributors
+#pull_request_rules:
+#
+#  - name: Automatic merge on approval
+#    conditions:
+#      - base=master
+#      # number of review approvals
+#      - "#approved-reviews-by>=3"
+#      # no waiting or assigned review
+#      - "#review-requested=0"
+#      # no requested chnages from any reviewer
+#      - "#changes-requested-reviews-by=0"
+#      # this serves as ALL check has to pass as we have actually around 40 tests in total
+#      - "#status-success>=54"
+#      # this is just in case since we rely on GPU tests (note: redundand to the above)
+#      - status-success=continuous-integration/drone/pr
+#      - "status-success=ci/circleci: TPU-tests"
+#      # this is patter-like, unofrunatly serves as `any(...)` (note: redundand to the above)
+#      #- "status-success~=^ci/circleci:"
+#      # no conflict with master branch
+#      - -conflict
+#      # was not closed yet
+#      - -closed
+#      # filter-out GH draft PRs
+#      - -draft
+#    actions:
+#      delete_head_branch: {}
+#      merge:
+#        # https://doc.mergify.io/merge-action.html#strict-merge
+#        # (on head branch) $ git merge --no-ff base
+#        # (on head branch) # Wait for CI to go green
+#        # (on head branch) # Squash all commits
+#        # (on base branch) $ git merge --ff head
+#        strict: true
+#        method: squash
+#      comment:
+#        message: Great job! =)
+#
+#  - name: warn on conflicts
+#    conditions:
+#      - conflict
+#      # filter-out GH draft PRs
+#      - -draft
+#    actions:
+#      comment:
+#        message: This pull request is now in conflict... :(
+#
+#  - name: add core reviewer
+#    conditions:
+#      # filter-out GH draft PRs
+#      - -draft
+#      # number of review approvals
+#      - "#approved-reviews-by<3"
+#    actions:
+#      request_reviews:
+#        teams:
+#          - core-contributors
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f078349ef3665d..051fe5fae09e5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 
-## [unreleased.Features] - YYYY-MM-DD
+## [unreleased.BugFix] - YYYY-MM-DD
 
 ### Added
 
@@ -22,28 +22,39 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 ### Fixed
 
 
-
-## [unreleased.BugFix] - YYYY-MM-DD
+## [1.1.1] - 2020-12-15
 
 ### Added
 
+- Add a notebook example to reach a quick baseline of ~94% accuracy on CIFAR10 using Resnet in Lightning ([#4818](https://github.com/PyTorchLightning/pytorch-lightning/pull/4818)
 
 ### Changed
 
-
-### Deprecated
+- Simplify accelerator steps ([#5015](https://github.com/PyTorchLightning/pytorch-lightning/pull/5015)
+- Refactor load in checkpoint connector ([#4593](https://github.com/PyTorchLightning/pytorch-lightning/pull/4593)
+- Fixed the saved filename in `ModelCheckpoint` when it already exists ([#4861](https://github.com/PyTorchLightning/pytorch-lightning/pull/4861))
 
 
+=======
 ### Removed
 
+- Drop duplicate metrics ([#5014](https://github.com/PyTorchLightning/pytorch-lightning/pull/5014)
+- Remove beta arg from F1 class and functional ([#5076](https://github.com/PyTorchLightning/pytorch-lightning/pull/5076)
 
 ### Fixed
 
 - Fixed trainer by default `None` in `DDPAccelerator` ([#4915](https://github.com/PyTorchLightning/pytorch-lightning/pull/4915))
-
-
-- Fixed `LightningOptimizer` exposes optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095))
-
+- Fixed `LightningOptimizer` to expose optimizer attributes ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095))
+- Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057))
+- Check if optimizer supports closure ([#4981](https://github.com/PyTorchLightning/pytorch-lightning/pull/4981)
+- Extend LightningOptimizer to exposure underlying Optimizer attributes + update doc ([#5095](https://github.com/PyTorchLightning/pytorch-lightning/pull/5095)
+- Add deprecated metric utility functions back to functional (
+    [#5067](https://github.com/PyTorchLightning/pytorch-lightning/pull/5067),
+    [#5068](https://github.com/PyTorchLightning/pytorch-lightning/pull/5068))
+- Allow any input in `to_onnx` and `to_torchscript` ([#4378](https://github.com/PyTorchLightning/pytorch-lightning/pull/4378)
+- Do not warn when the name key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)
+
+- Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157))
 
 
 ## [1.1.0] - 2020-12-09
diff --git a/benchmarks/test_parity.py b/benchmarks/test_parity.py
index 41bba9533e10d7..3508d5a3c28acc 100644
--- a/benchmarks/test_parity.py
+++ b/benchmarks/test_parity.py
@@ -4,8 +4,8 @@
 import pytest
 import torch
 
+from pytorch_lightning import seed_everything, Trainer
 import tests.base.develop_utils as tutils
-from pytorch_lightning import Trainer, seed_everything
 from tests.base.models import ParityModuleMNIST, ParityModuleRNN
 
 
diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py
index 9fe49764421785..2e52613462621c 100644
--- a/benchmarks/test_sharded_parity.py
+++ b/benchmarks/test_sharded_parity.py
@@ -6,7 +6,7 @@
 import pytest
 import torch
 
-from pytorch_lightning import Trainer, seed_everything
+from pytorch_lightning import seed_everything, Trainer
 from pytorch_lightning.plugins.ddp_plugin import DDPPlugin
 from pytorch_lightning.plugins.sharded_plugin import DDPShardedPlugin
 from pytorch_lightning.utilities import FAIRSCALE_AVAILABLE, NATIVE_AMP_AVAILABLE
diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile
index 8eb093295c37bb..5dfeac8c9e86ea 100644
--- a/dockers/base-xla/Dockerfile
+++ b/dockers/base-xla/Dockerfile
@@ -97,6 +97,8 @@ RUN \
     python -c "fname = 'requirements.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('torch')] ; open(fname, 'w').writelines(lines)" && \
     # drop Horovod as it is not needed
     python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
+    # drop fairscale as it is not needed
+    python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \
     # drop TorchVision as it was installed with XLA
     python -c "fname = 'requirements/examples.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('torchvision')] ; open(fname, 'w').writelines(lines)" && \
     pip install --requirement ./requirements/devel.txt --upgrade-strategy only-if-needed && \
diff --git a/dockers/tpu-tests/Dockerfile b/dockers/tpu-tests/Dockerfile
index a514b1c3d35fed..464f7fd8f309eb 100644
--- a/dockers/tpu-tests/Dockerfile
+++ b/dockers/tpu-tests/Dockerfile
@@ -27,8 +27,10 @@ COPY ./ ./pytorch-lightning/
 RUN \
     # Install pytorch-lightning at the current PR, plus dependencies.
     #pip install -r pytorch-lightning/requirements.txt --no-cache-dir && \
-    # drop Horovod
+    # drop Horovod as it is not needed
     python -c "fname = 'pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
+    # drop fairscale as it is not needed
+    python -c "fname = 'pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \
     pip install -r pytorch-lightning/requirements/devel.txt --no-cache-dir --upgrade-strategy only-if-needed
 
 #RUN python -c "import pytorch_lightning as pl; print(pl.__version__)"
diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst
index d6d082e2ed779b..d4cf578e10bda2 100644
--- a/docs/source/introduction_guide.rst
+++ b/docs/source/introduction_guide.rst
@@ -601,8 +601,8 @@ In this method we do all the preparation we need to do once (instead of on every
         def setup(self, stage):
             # transform
             transform=transforms.Compose([transforms.ToTensor()])
-            MNIST(os.getcwd(), train=True, download=False, transform=transform)
-            MNIST(os.getcwd(), train=False, download=False, transform=transform)
+            mnist_train = MNIST(os.getcwd(), train=True, download=False, transform=transform)
+            mnist_test = MNIST(os.getcwd(), train=False, download=False, transform=transform)
 
             # train/val split
             mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])
diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst
index def47810504d69..b3e0b905f27f43 100644
--- a/docs/source/multi_gpu.rst
+++ b/docs/source/multi_gpu.rst
@@ -663,7 +663,7 @@ It is highly recommended to use Sharded Training in multi-GPU environments where
 A technical note: as batch size scales, storing activations for the backwards pass becomes the bottleneck in training. As a result, sharding optimizer state and gradients becomes less impactful.
 Work within the future will bring optional sharding to activations and model parameters to reduce memory further, but come with a speed cost.
 
-To use Sharded Training, you need to first install FairScale using the command below or install all extras using ``pip install pytorch-lightning["extra"]``.
+To use Sharded Training, you need to first install FairScale using the command below.
 
 .. code-block:: bash
 
diff --git a/notebooks/04-transformers-text-classification.ipynb b/notebooks/04-transformers-text-classification.ipynb
index 037b24e4ddd9dc..d52af84a76d975 100644
--- a/notebooks/04-transformers-text-classification.ipynb
+++ b/notebooks/04-transformers-text-classification.ipynb
@@ -1,5 +1,12 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/notebooks/05-trainer-flags-overview.ipynb b/notebooks/05-trainer-flags-overview.ipynb
index 6413e8239bb2e5..da044a9c9b5c6e 100644
--- a/notebooks/05-trainer-flags-overview.ipynb
+++ b/notebooks/05-trainer-flags-overview.ipynb
@@ -1,5 +1,12 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/05-trainer-flags-overview.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
diff --git a/pyproject.toml b/pyproject.toml
index 760421a56ece8c..01e416aa51d8b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,7 @@ exclude = "(.eggs|.git|.hg|.mypy_cache|.nox|.tox|.venv|.svn|_build|buck-out|buil
 
 [tool.isort]
 known_first_party = [
-    "bencharmks",
+    "benchmarks",
     "docs",
     "pl_examples",
     "pytorch_lightning",
@@ -52,3 +52,5 @@ skip_glob = [
 ]
 profile = "black"
 line_length = 120
+force_sort_within_sections = "True"
+order_by_type = "False"
diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py
index 408d95a72dc470..222263ea2d3853 100644
--- a/pytorch_lightning/__init__.py
+++ b/pytorch_lightning/__init__.py
@@ -1,6 +1,6 @@
 """Root package info."""
 
-__version__ = '1.1.1rc0'
+__version__ = '1.1.1'
 __author__ = 'William Falcon et al.'
 __author_email__ = 'waf2107@columbia.edu'
 __license__ = 'Apache-2.0'
diff --git a/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py b/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py
index a0545a4604aece..b9a71ed2717441 100644
--- a/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py
+++ b/pytorch_lightning/accelerators/ddp_cpu_hpc_accelerator.py
@@ -48,3 +48,6 @@ def model_to_device(self, model, process_idx):
     def get_device_ids(self):
         device_ids = None
         return device_ids
+
+    def init_device(self, process_idx):
+        pass
diff --git a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py
index ec4c087998614e..b257884e34aef5 100644
--- a/pytorch_lightning/accelerators/ddp_hpc_accelerator.py
+++ b/pytorch_lightning/accelerators/ddp_hpc_accelerator.py
@@ -126,6 +126,7 @@ def ddp_train(self, process_idx, model):
         """
         # determine which process we are and world size
         self.set_world_ranks(process_idx)
+        self.init_device(process_idx)
 
         # toggle prog bar
         if (self.trainer.node_rank != 0 or process_idx != 0) and self.trainer.progress_bar_callback is not None:
diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 88f1881643c9aa..4125a924cb2c59 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -19,6 +19,7 @@
 Monitor a metric and stop training when it stops improving.
 
 """
+import numbers
 import os
 
 import numpy as np
@@ -26,7 +27,8 @@
 
 from pytorch_lightning import _logger as log
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.utilities import rank_zero_info, rank_zero_warn, TPU_AVAILABLE
+from pytorch_lightning.metrics.metric import Metric
+from pytorch_lightning.utilities import TPU_AVAILABLE, rank_zero_info, rank_zero_warn
 
 
 class EarlyStopping(Callback):
@@ -201,8 +203,11 @@ def _run_early_stopping_check(self, trainer, pl_module):
         # when in dev debugging
         trainer.dev_debugger.track_early_stopping_history(self, current)
 
-        if not isinstance(current, torch.Tensor):
-            current = torch.tensor(current, device=pl_module.device)
+        if current is not None:
+            if isinstance(current, Metric):
+                current = current.compute()
+            elif isinstance(current, numbers.Number):
+                current = torch.tensor(current, device=pl_module.device, dtype=torch.float)
 
         if trainer.use_tpu and TPU_AVAILABLE:
             current = current.cpu()
diff --git a/pytorch_lightning/callbacks/lr_monitor.py b/pytorch_lightning/callbacks/lr_monitor.py
index 081aec45067cf1..9799e0d3298d35 100755
--- a/pytorch_lightning/callbacks/lr_monitor.py
+++ b/pytorch_lightning/callbacks/lr_monitor.py
@@ -157,7 +157,7 @@ def _find_names(self, lr_schedulers) -> List[str]:
         names = []
         for scheduler in lr_schedulers:
             sch = scheduler['scheduler']
-            if 'name' in scheduler:
+            if scheduler['name'] is not None:
                 name = scheduler['name']
             else:
                 opt_name = 'lr-' + sch.optimizer.__class__.__name__
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index 1354f7f5056b39..82df32ce3996c2 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -20,6 +20,7 @@
 
 """
 
+import numbers
 import os
 import re
 from copy import deepcopy
@@ -32,8 +33,9 @@
 
 from pytorch_lightning import _logger as log
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.utilities import rank_zero_info, rank_zero_only, rank_zero_warn
+from pytorch_lightning.metrics.metric import Metric
 from pytorch_lightning.plugins.rpc_plugin import RPCPlugin
+from pytorch_lightning.utilities import rank_zero_info, rank_zero_only, rank_zero_warn
 from pytorch_lightning.utilities.cloud_io import get_filesystem
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
@@ -240,17 +242,14 @@ def save_checkpoint(self, trainer, pl_module):
         # what can be monitored
         monitor_candidates = self._monitor_candidates(trainer)
 
-        # ie: path/val_loss=0.5.ckpt
-        filepath = self._get_metric_interpolated_filepath_name(monitor_candidates, epoch, global_step)
-
         # callback supports multiple simultaneous modes
         # here we call each mode sequentially
         # Mode 1: save all checkpoints OR only the top k
         if self.save_top_k:
-            self._save_top_k_checkpoints(monitor_candidates, trainer, pl_module, filepath)
+            self._save_top_k_checkpoints(trainer, pl_module, monitor_candidates)
 
         # Mode 2: save the last checkpoint
-        self._save_last_checkpoint(trainer, pl_module, monitor_candidates, filepath)
+        self._save_last_checkpoint(trainer, pl_module, monitor_candidates)
 
     def __validate_init_configuration(self):
         if self.save_top_k is not None and self.save_top_k < -1:
@@ -444,6 +443,7 @@ def format_checkpoint_name(
         )
         if ver is not None:
             filename = self.CHECKPOINT_JOIN_CHAR.join((filename, f"v{ver}"))
+
         ckpt_name = f"{filename}{self.FILE_EXTENSION}"
         return os.path.join(self.dirpath, ckpt_name) if self.dirpath else ckpt_name
 
@@ -515,13 +515,20 @@ def _validate_monitor_key(self, trainer):
             )
             raise MisconfigurationException(m)
 
-    def _get_metric_interpolated_filepath_name(self, ckpt_name_metrics: Dict[str, Any], epoch: int, step: int):
+    def _get_metric_interpolated_filepath_name(
+        self,
+        ckpt_name_metrics: Dict[str, Any],
+        epoch: int,
+        step: int,
+        del_filepath: Optional[str] = None
+    ) -> str:
         filepath = self.format_checkpoint_name(epoch, step, ckpt_name_metrics)
+
         version_cnt = 0
-        while self._fs.exists(filepath):
+        while self._fs.exists(filepath) and filepath != del_filepath:
             filepath = self.format_checkpoint_name(epoch, step, ckpt_name_metrics, ver=version_cnt)
-            # this epoch called before
             version_cnt += 1
+
         return filepath
 
     def _monitor_candidates(self, trainer):
@@ -531,13 +538,11 @@ def _monitor_candidates(self, trainer):
         ckpt_name_metrics.update({"step": trainer.global_step, "epoch": trainer.current_epoch})
         return ckpt_name_metrics
 
-    def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics, filepath):
+    def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics):
         should_save_last = self.monitor is None or self.save_last
         if not should_save_last:
             return
 
-        last_filepath = filepath
-
         # when user ALSO asked for the 'last.ckpt' change the name
         if self.save_last:
             last_filepath = self._format_checkpoint_name(
@@ -548,6 +553,10 @@ def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics, filepath)
                 prefix=self.prefix
             )
             last_filepath = os.path.join(self.dirpath, f"{last_filepath}{self.FILE_EXTENSION}")
+        else:
+            last_filepath = self._get_metric_interpolated_filepath_name(
+                ckpt_name_metrics, trainer.current_epoch, trainer.global_step
+            )
 
         accelerator_backend = trainer.accelerator_backend
 
@@ -568,16 +577,19 @@ def _save_last_checkpoint(self, trainer, pl_module, ckpt_name_metrics, filepath)
         if self.monitor is None:
             self.best_model_path = self.last_model_path
 
-    def _save_top_k_checkpoints(self, metrics, trainer, pl_module, filepath):
+    def _save_top_k_checkpoints(self, trainer, pl_module, metrics):
         current = metrics.get(self.monitor)
         epoch = metrics.get("epoch")
         step = metrics.get("step")
 
-        if not isinstance(current, torch.Tensor) and current is not None:
-            current = torch.tensor(current, device=pl_module.device)
+        if current is not None:
+            if isinstance(current, Metric):
+                current = current.compute()
+            elif isinstance(current, numbers.Number):
+                current = torch.tensor(current, device=pl_module.device, dtype=torch.float)
 
         if self.check_monitor_top_k(current):
-            self._update_best_and_save(filepath, current, epoch, step, trainer, pl_module)
+            self._update_best_and_save(current, epoch, step, trainer, pl_module, metrics)
         elif self.verbose:
             rank_zero_info(
                 f"Epoch {epoch:d}, step {step:d}: {self.monitor} was not in top {self.save_top_k}"
@@ -588,25 +600,26 @@ def _is_valid_monitor_key(self, metrics):
 
     def _update_best_and_save(
         self,
-        filepath: str,
         current: torch.Tensor,
         epoch: int,
         step: int,
         trainer,
         pl_module,
+        ckpt_name_metrics
     ):
         k = len(self.best_k_models) + 1 if self.save_top_k == -1 else self.save_top_k
 
-        del_list = []
+        del_filepath = None
         if len(self.best_k_models) == k and k > 0:
-            delpath = self.kth_best_model_path
-            self.best_k_models.pop(self.kth_best_model_path)
-            del_list.append(delpath)
+            del_filepath = self.kth_best_model_path
+            self.best_k_models.pop(del_filepath)
 
         # do not save nan, replace with +/- inf
         if torch.isnan(current):
             current = torch.tensor(float('inf' if self.mode == "min" else '-inf'))
 
+        filepath = self._get_metric_interpolated_filepath_name(ckpt_name_metrics, epoch, step, del_filepath)
+
         # save the current score
         self.current_score = current
         self.best_k_models[filepath] = current
@@ -630,9 +643,8 @@ def _update_best_and_save(
             )
         self._save_model(filepath, trainer, pl_module)
 
-        for cur_path in del_list:
-            if cur_path != filepath:
-                self._del_model(cur_path)
+        if del_filepath is not None and filepath != del_filepath:
+            self._del_model(del_filepath)
 
     def to_yaml(self, filepath: Optional[Union[str, Path]] = None):
         """
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 57979b73f2cb6d..f24a4ce8beb8ac 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -14,7 +14,7 @@
 
 """Various hooks to be used in the Lightning code."""
 
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 import torch
 from pytorch_lightning.utilities import move_data_to_device, rank_zero_warn
@@ -501,7 +501,7 @@ def val_dataloader(self):
             will have an argument ``dataloader_idx`` which matches the order here.
         """
 
-    def transfer_batch_to_device(self, batch: Any, device: torch.device) -> Any:
+    def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] = None) -> Any:
         """
         Override this hook if your :class:`~torch.utils.data.DataLoader` returns tensors
         wrapped in a custom data structure.
@@ -549,6 +549,7 @@ def transfer_batch_to_device(self, batch, device)
             - :func:`~pytorch_lightning.utilities.apply_func.move_data_to_device`
             - :func:`~pytorch_lightning.utilities.apply_func.apply_to_collection`
         """
+        device = device or self.device
         return move_data_to_device(batch, device)
 
 
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index ef05ce69c1828b..ab66435a2935db 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -22,6 +22,7 @@
 import tempfile
 from abc import ABC
 from argparse import Namespace
+from pathlib import Path
 from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union
 
 import torch
@@ -278,6 +279,7 @@ def log(
                 sync_dist_group,
                 accelerator.sync_tensor,
                 self._current_dataloader_idx,
+                self.device,
             )
 
     def log_dict(
@@ -989,7 +991,7 @@ def configure_optimizers(
             - List or Tuple - List of optimizers.
             - Two lists - The first list has multiple optimizers, the second a list of LR schedulers (or lr_dict).
             - Dictionary, with an 'optimizer' key, and (optionally) a 'lr_scheduler'
-              key which value is a single LR scheduler or lr_dict.
+              key whose value is a single LR scheduler or lr_dict.
             - Tuple of dictionaries as described, with an optional 'frequency' key.
             - None - Fit will run without any optimizer.
 
@@ -1001,21 +1003,22 @@ def configure_optimizers(
             In the former case, all optimizers will operate on the given batch in each optimization step.
             In the latter, only one optimizer will operate on the given batch at every step.
 
-            The lr_dict is a dictionary which contains scheduler and its associated configuration.
-            It has five keys. The default configuration is shown below.
+            The lr_dict is a dictionary which contains the scheduler and its associated configuration.
+            The default configuration is shown below.
 
             .. code-block:: python
 
                 {
-                    'scheduler': lr_scheduler, # The LR schduler
+                    'scheduler': lr_scheduler, # The LR scheduler instance (required)
                     'interval': 'epoch', # The unit of the scheduler's step size
                     'frequency': 1, # The frequency of the scheduler
                     'reduce_on_plateau': False, # For ReduceLROnPlateau scheduler
                     'monitor': 'val_loss', # Metric for ReduceLROnPlateau to monitor
-                    'strict': True # Whether to crash the training if `monitor` is not found
+                    'strict': True, # Whether to crash the training if `monitor` is not found
+                    'name': None, # Custom name for LearningRateMonitor to use
                 }
 
-            If user only provides LR schedulers, then their configuration will set to default as shown above.
+            Only the ``scheduler`` key is required, the rest will be set to the defaults above.
 
         Examples:
             .. code-block:: python
@@ -1390,12 +1393,15 @@ def get_progress_bar_dict(self):
         """
         # call .item() only once but store elements without graphs
         running_train_loss = self.trainer.train_loop.running_loss.mean()
-        avg_training_loss = (
-            running_train_loss.cpu().item()
-            if running_train_loss is not None
-            else float("NaN")
-        )
-        tqdm_dict = {"loss": "{:.3g}".format(avg_training_loss)}
+        avg_training_loss = None
+        if running_train_loss is not None:
+            avg_training_loss = running_train_loss.cpu().item()
+        elif self.trainer.train_loop.automatic_optimization:
+            avg_training_loss = float('NaN')
+
+        tqdm_dict = {}
+        if avg_training_loss is not None:
+            tqdm_dict["loss"] = f"{avg_training_loss:.3g}"
 
         if self.trainer.truncated_bptt_steps is not None:
             tqdm_dict["split_idx"] = self.trainer.split_idx
@@ -1530,12 +1536,19 @@ def _set_hparams(self, hp: Union[dict, Namespace, str]) -> None:
         else:
             self._hparams = hp
 
-    def to_onnx(self, file_path: str, input_sample: Optional[Tensor] = None, **kwargs):
-        """Saves the model in ONNX format
+    @torch.no_grad()
+    def to_onnx(
+        self,
+        file_path: Union[str, Path],
+        input_sample: Optional[Any] = None,
+        **kwargs,
+    ):
+        """
+        Saves the model in ONNX format
 
         Args:
-            file_path: The path of the file the model should be saved to.
-            input_sample: A sample of an input tensor for tracing.
+            file_path: The path of the file the onnx model should be saved to.
+            input_sample: An input for tracing. Default: None (Use self.example_input_array)
             **kwargs: Will be passed to torch.onnx.export function.
 
         Example:
@@ -1554,31 +1567,32 @@ def to_onnx(self, file_path: str, input_sample: Optional[Tensor] = None, **kwarg
             ...     os.path.isfile(tmpfile.name)
             True
         """
+        mode = self.training
 
-        if isinstance(input_sample, Tensor):
-            input_data = input_sample
-        elif self.example_input_array is not None:
-            input_data = self.example_input_array
-        else:
-            if input_sample is not None:
+        if input_sample is None:
+            if self.example_input_array is None:
                 raise ValueError(
-                    f"Received `input_sample` of type {type(input_sample)}. Expected type is `Tensor`"
+                    "Could not export to ONNX since neither `input_sample` nor"
+                    " `model.example_input_array` attribute is set."
                 )
-            raise ValueError(
-                "Could not export to ONNX since neither `input_sample` nor"
-                " `model.example_input_array` attribute is set."
-            )
-        input_data = input_data.to(self.device)
+            input_sample = self.example_input_array
+
+        input_sample = self.transfer_batch_to_device(input_sample)
+
         if "example_outputs" not in kwargs:
             self.eval()
-            with torch.no_grad():
-                kwargs["example_outputs"] = self(input_data)
+            kwargs["example_outputs"] = self(input_sample)
 
-        torch.onnx.export(self, input_data, file_path, **kwargs)
+        torch.onnx.export(self, input_sample, file_path, **kwargs)
+        self.train(mode)
 
+    @torch.no_grad()
     def to_torchscript(
-        self, file_path: Optional[str] = None, method: Optional[str] = 'script',
-            example_inputs: Optional[Union[torch.Tensor, Tuple[torch.Tensor]]] = None, **kwargs
+        self,
+        file_path: Optional[Union[str, Path]] = None,
+        method: Optional[str] = 'script',
+        example_inputs: Optional[Any] = None,
+        **kwargs,
     ) -> Union[ScriptModule, Dict[str, ScriptModule]]:
         """
         By default compiles the whole model to a :class:`~torch.jit.ScriptModule`.
@@ -1590,7 +1604,7 @@ def to_torchscript(
         Args:
             file_path: Path where to save the torchscript. Default: None (no file saved).
             method: Whether to use TorchScript's script or trace method. Default: 'script'
-            example_inputs: Tensor to be used to do tracing when method is set to 'trace'.
+            example_inputs: An input to be used to do tracing when method is set to 'trace'.
               Default: None (Use self.example_input_array)
             **kwargs: Additional arguments that will be passed to the :func:`torch.jit.script` or
               :func:`torch.jit.trace` function.
@@ -1624,21 +1638,27 @@ def to_torchscript(
             This LightningModule as a torchscript, regardless of whether file_path is
             defined or not.
         """
-
         mode = self.training
-        with torch.no_grad():
-            if method == 'script':
-                torchscript_module = torch.jit.script(self.eval(), **kwargs)
-            elif method == 'trace':
-                # if no example inputs are provided, try to see if model has example_input_array set
-                if example_inputs is None:
-                    example_inputs = self.example_input_array
-                # automatically send example inputs to the right device and use trace
-                example_inputs = self.transfer_batch_to_device(example_inputs, device=self.device)
-                torchscript_module = torch.jit.trace(func=self.eval(), example_inputs=example_inputs, **kwargs)
-            else:
-                raise ValueError(f"The 'method' parameter only supports 'script' or 'trace', but value given was:"
-                                 f"{method}")
+
+        if method == 'script':
+            torchscript_module = torch.jit.script(self.eval(), **kwargs)
+        elif method == 'trace':
+            # if no example inputs are provided, try to see if model has example_input_array set
+            if example_inputs is None:
+                if self.example_input_array is None:
+                    raise ValueError(
+                        'Choosing method=`trace` requires either `example_inputs`'
+                        ' or `model.example_input_array` to be defined'
+                    )
+                example_inputs = self.example_input_array
+
+            # automatically send example inputs to the right device and use trace
+            example_inputs = self.transfer_batch_to_device(example_inputs)
+            torchscript_module = torch.jit.trace(func=self.eval(), example_inputs=example_inputs, **kwargs)
+        else:
+            raise ValueError("The 'method' parameter only supports 'script' or 'trace',"
+                             f" but value given was: {method}")
+
         self.train(mode)
 
         if file_path is not None:
diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 142fe9048cb0ea..b6112a68b4e9b8 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -15,15 +15,15 @@
 """[Train, Eval]Result for easier logging, checkpointing, early stopping, epoch-wise reduction."""
 
 import numbers
+import os
 from copy import copy
-from typing import Optional, Dict, Union, Sequence, Callable, MutableMapping, Any, List, Tuple, Iterable
+from typing import Any, Callable, Dict, Iterable, List, MutableMapping, Optional, Sequence, Tuple, Union
 
 import torch
 from torch import Tensor
-import os
 
-from pytorch_lightning.utilities.distributed import sync_ddp_if_available
 from pytorch_lightning.metrics import Metric
+from pytorch_lightning.utilities.distributed import sync_ddp_if_available
 
 
 class Result(Dict):
@@ -128,6 +128,7 @@ def log(
         sync_dist_group: Optional[Any] = None,
         sync_fn: Callable = None,
         dataloader_idx: Optional[int] = None,
+        device: torch.device = None,
     ):
         # no metrics should be logged with graphs
         if not enable_graph and isinstance(value, torch.Tensor):
@@ -138,7 +139,10 @@ def log(
         if sync_dist and isinstance(value, (torch.Tensor, numbers.Number)):
             is_dist_initialized = torch.distributed.is_available() and torch.distributed.is_initialized()
             # TODO: Find a way to make the reduction only once, so we don't need to clone.
-            value = value.clone() if is_dist_initialized else value
+            if is_dist_initialized and isinstance(value, torch.Tensor):
+                value = value.clone()
+            else:
+                value = torch.tensor(value, device=device, dtype=torch.float)
             value = sync_fn(value, group=sync_dist_group, reduce_op=sync_dist_op)
 
         if 'meta' not in self:
@@ -367,7 +371,10 @@ def get_forked_metrics(self, add_dataloader_idx=False):
             dl_key = self._add_dataloader_idx(k, options["dataloader_idx"], add_dataloader_idx)
 
             if options['forked']:
-                result[dl_key] = self[k]
+                if isinstance(self[k], Metric):
+                    result[dl_key] = self[k].compute().detach()
+                else:
+                    result[dl_key] = self[k]
 
         return result
 
diff --git a/pytorch_lightning/metrics/classification/f_beta.py b/pytorch_lightning/metrics/classification/f_beta.py
index d6147b00463b35..fadfd000ebbe1e 100755
--- a/pytorch_lightning/metrics/classification/f_beta.py
+++ b/pytorch_lightning/metrics/classification/f_beta.py
@@ -52,11 +52,11 @@ class FBeta(Metric):
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
         compute_on_step:
@@ -185,11 +185,11 @@ class F1(FBeta):
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
         compute_on_step:
@@ -212,7 +212,6 @@ class F1(FBeta):
     def __init__(
         self,
         num_classes: int = 1,
-        beta: float = 1.0,
         threshold: float = 0.5,
         average: str = "micro",
         multilabel: bool = False,
diff --git a/pytorch_lightning/metrics/functional/f_beta.py b/pytorch_lightning/metrics/functional/f_beta.py
index 3f0a7a04493257..2b0ba194d56f02 100755
--- a/pytorch_lightning/metrics/functional/f_beta.py
+++ b/pytorch_lightning/metrics/functional/f_beta.py
@@ -83,11 +83,11 @@ def fbeta(
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
 
@@ -110,7 +110,6 @@ def f1(
         preds: torch.Tensor,
         target: torch.Tensor,
         num_classes: int,
-        beta: float = 1.0,
         threshold: float = 0.5,
         average: str = "micro",
         multilabel: bool = False
@@ -136,11 +135,11 @@ def f1(
             Threshold value for binary or multi-label logits. default: 0.5
 
         average:
-            * `'micro'` computes metric globally
-            * `'macro'` computes metric for each class and uniformly averages them
-            * `'weighted'` computes metric for each class and does a weighted-average,
-                where each class is weighted by their support (accounts for class imbalance)
-            * `None` computes and returns the metric per class
+            - ``'micro'`` computes metric globally
+            - ``'macro'`` computes metric for each class and uniformly averages them
+            - ``'weighted'`` computes metric for each class and does a weighted-average,
+              where each class is weighted by their support (accounts for class imbalance)
+            - ``'none'`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
 
diff --git a/pytorch_lightning/setup_tools.py b/pytorch_lightning/setup_tools.py
index 3842bbe50cfc5d..29ac3b814b3c2e 100644
--- a/pytorch_lightning/setup_tools.py
+++ b/pytorch_lightning/setup_tools.py
@@ -14,12 +14,12 @@
 # limitations under the License.
 import os
 import re
-import warnings
 from typing import Iterable, List
 from urllib.error import HTTPError, URLError
 from urllib.request import Request, urlopen
+import warnings
 
-from pytorch_lightning import PROJECT_ROOT, __homepage__, __version__
+from pytorch_lightning import __homepage__, __version__, PROJECT_ROOT
 
 _PATH_BADGES = os.path.join('.', 'docs', 'source', '_images', 'badges')
 # badge to download
diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
index 2311cc767de2d4..429bddd88b77e9 100644
--- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py
+++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
@@ -13,14 +13,16 @@
 # limitations under the License.
 
 import os
+from pathlib import Path
 import re
+from typing import Union, Optional
 
 import torch
 
 import pytorch_lightning
 from pytorch_lightning import _logger as log
 from pytorch_lightning.core.lightning import LightningModule
-from pytorch_lightning.utilities import APEX_AVAILABLE, AMPType, OMEGACONF_AVAILABLE, rank_zero_warn
+from pytorch_lightning.utilities import APEX_AVAILABLE, AMPType, OMEGACONF_AVAILABLE, rank_zero_info, rank_zero_warn
 from pytorch_lightning.utilities.cloud_io import atomic_save, get_filesystem
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.upgrade_checkpoint import KEYS_MAPPING as DEPRECATED_CHECKPOINT_KEYS
@@ -52,16 +54,17 @@ def restore_weights(self, model: LightningModule):
         if self.trainer.on_gpu:
             torch.cuda.empty_cache()
 
-        # if script called from hpc resubmit, load weights
-        did_restore_hpc_weights = self.restore_hpc_weights_if_needed(model)
+        # 1. Attempt to restore states from HPC checkpoint
+        dir_path_hpc = str(self.trainer.weights_save_path)
+        max_suffix = self.max_ckpt_in_folder(dir_path_hpc, "hpc_ckpt_")
+        if max_suffix is not None:
+            checkpoint_path = f'{dir_path_hpc}/hpc_ckpt_{max_suffix}.ckpt'
+            self.hpc_load(checkpoint_path, self.trainer.on_gpu)
+            rank_zero_info(f'restored hpc model from: {checkpoint_path}')
 
-        # clear cache after restore
-        if self.trainer.on_gpu:
-            torch.cuda.empty_cache()
-
-        if not did_restore_hpc_weights:
-            if self.trainer.resume_from_checkpoint is not None:
-                self.restore(self.trainer.resume_from_checkpoint, on_gpu=self.trainer.on_gpu)
+        # 2. Attempt to restore states from `resume_from_checkpoint` file
+        elif self.trainer.resume_from_checkpoint is not None:
+            self.restore(self.trainer.resume_from_checkpoint, on_gpu=self.trainer.on_gpu)
 
         # wait for all to catch up
         self.trainer.accelerator_backend.barrier('TrainerIOMixin.restore_weights')
@@ -72,24 +75,14 @@ def restore_weights(self, model: LightningModule):
 
     def restore(self, checkpoint_path: str, on_gpu: bool):
         """
-        Load model/training states from the checkpoint file through file-read and state-restore.
-        Also restores all training state like:
-        - epoch
-        - callbacks
-        - schedulers
-        - optimizer
-        In detail, check return value description of `dump_checkpoint`
+        Load model/training states from a 'PyTorch-Lightning checkpoint' file through file-read and state-restore.
+        All restored states are listed in return value description of `dump_checkpoint`.
         """
 
-        # if on_gpu:
-        #     checkpoint = torch.load(checkpoint_path)
-        # else:
-        # load on CPU first
-        # read a checkpoint dictionary object from the checkpoint file at `checkpoint_path`
+        # read a checkpoint dictionary object from the 'PyTorch-Lightning checkpoint' file at `checkpoint_path`
         checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
 
-        # restore states from the checkpoint dictionary object
-        # load model state
+        # acquire the model
         model = self.trainer.get_model()
 
         # restore model and datamodule state
@@ -106,14 +99,14 @@ def restore_model_state(self, model: LightningModule, checkpoint) -> None:
         Restore model states from a 'PyTorch-Lightning checkpoint' dictionary object
         """
 
-        # give the datamodule a chance to load something
+        # restore datamodule states
         if self.trainer.datamodule is not None:
             self.trainer.datamodule.on_load_checkpoint(checkpoint)
 
-        # give model a chance to restore something
+        # hook: give user access to checkpoint if needed.
         model.on_load_checkpoint(checkpoint)
 
-        # restore the state_dict on the model
+        # restore model state_dict
         model.load_state_dict(checkpoint['state_dict'])
 
     def restore_training_state(self, checkpoint):
@@ -187,23 +180,6 @@ def restore_training_state(self, checkpoint):
         for scheduler, lrs_state in zip(self.trainer.lr_schedulers, lr_schedulers):
             scheduler['scheduler'].load_state_dict(lrs_state)
 
-    def restore_hpc_weights_if_needed(self, model: LightningModule):
-        """If there is a set of hpc weights, use as signal to restore model."""
-        did_restore = False
-
-        # look for hpc weights
-        folderpath = str(self.trainer.weights_save_path)
-        fs = get_filesystem(folderpath)
-        if fs.exists(folderpath):
-            files = [os.path.basename(f['name']) for f in fs.listdir(folderpath)]
-            hpc_weight_paths = [x for x in files if 'hpc_ckpt' in x]
-
-            # if hpc weights exist restore model
-            if len(hpc_weight_paths) > 0:
-                self.hpc_load(folderpath, self.trainer.on_gpu)
-                did_restore = True
-        return did_restore
-
     # ----------------------------------
     # PRIVATE OPS
     # ----------------------------------
@@ -216,7 +192,8 @@ def hpc_save(self, folderpath: str, logger):
         # save logger to make sure we get all the metrics
         logger.save()
 
-        ckpt_number = self.max_ckpt_in_folder(folderpath) + 1
+        max_suffix = self.max_ckpt_in_folder(folderpath)
+        ckpt_number = (max_suffix if max_suffix is not None else 0) + 1
 
         fs.makedirs(folderpath, exist_ok=True)
         filepath = os.path.join(folderpath, f'hpc_ckpt_{ckpt_number}.ckpt')
@@ -333,36 +310,52 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict:
 
         return checkpoint
 
-    def hpc_load(self, folderpath, on_gpu):
-        filepath = '{}/hpc_ckpt_{}.ckpt'.format(folderpath, self.max_ckpt_in_folder(folderpath))
+    def hpc_load(self, checkpoint_path: str, on_gpu: bool):
+        """
+        Load model/training states from a 'PyTorch-Lightning checkpoint' file for hpc.
+        All restored states are listed in return value description of `dump_checkpoint`.
+        """
 
-        # load on CPU first
-        checkpoint = pl_load(filepath, map_location=lambda storage, loc: storage)
+        # read a checkpoint dictionary object from the 'PyTorch-Lightning checkpoint' file at `checkpoint_path`
+        checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
 
-        # load model state
+        # acquire the model
         model = self.trainer.get_model()
 
-        # restore states from 'PyTorch-Lightning checkpoint' dictionary object
+        # restore model and datamodule state
         self.restore_model_state(model, checkpoint)
 
         if self.trainer.root_gpu is not None:
             model.cuda(self.trainer.root_gpu)
 
-        # load training state (affects trainer only)
+        # restore training state
         self.restore_training_state(checkpoint)
 
-        # call model hook
+        # call hpc specific hook
         model.on_hpc_load(checkpoint)
 
-        log.info(f'restored hpc model from: {filepath}')
+    def max_ckpt_in_folder(self, dir_path: Union[str, Path], name_key: str = 'ckpt_') -> Optional[int]:
+        """List up files in `dir_path` with name_key, then yield maximum suffix number.
+
+        Args:
+            dir_path: path of directory which may contain files whose name include `name_key`
+
+        Returns:
+            None if no-corresponding-file else maximum suffix number
+        """
+
+        # check directory existence
+        fs = get_filesystem(dir_path)
+        if not fs.exists(dir_path):
+            return None
 
-    def max_ckpt_in_folder(self, path, name_key='ckpt_'):
-        fs = get_filesystem(path)
-        files = [os.path.basename(f["name"]) for f in fs.listdir(path)]
+        # check corresponding file existence
+        files = [os.path.basename(f["name"]) for f in fs.listdir(dir_path)]
         files = [x for x in files if name_key in x]
         if len(files) == 0:
-            return 0
+            return None
 
+        # extract suffix number
         ckpt_vs = []
         for name in files:
             name = name.split(name_key)[-1]
@@ -371,6 +364,13 @@ def max_ckpt_in_folder(self, path, name_key='ckpt_'):
 
         return max(ckpt_vs)
 
+    def get_max_ckpt_path_from_folder(self, folder_path: Union[str, Path]) -> str:
+        """Get path of maximum-epoch checkpoint in the folder."""
+
+        max_suffix = self.max_ckpt_in_folder(folder_path)
+        ckpt_number = max_suffix if max_suffix is not None else 0
+        return f'{folder_path}/hpc_ckpt_{ckpt_number}.ckpt'
+
     def save_checkpoint(self, filepath, weights_only: bool = False):
         """Save model/training states as a checkpoint file through state-dump and file-write.
 
diff --git a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
index 28025859814cc2..6d206f3dd929ed 100644
--- a/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
+++ b/pytorch_lightning/trainer/connectors/logger_connector/epoch_result_store.py
@@ -91,11 +91,13 @@ def check_dataloader_idx(self, result: Result) -> bool:
         random_key = list(result.keys())[-1]
         return result["meta"][random_key]["dataloader_idx"] is not None
 
-    def get_latest_from_func_name(self, latest_result, func_name: str, *args, **kwargs) -> Dict:
+    def get_latest_from_func_name(self, latest_result_opt, func_name: str, *args, **kwargs) -> Dict:
         results = {}
-        add_dataloader_idx = self.check_dataloader_idx(latest_result)
-        func = getattr(latest_result, func_name)
-        results.update(func(*args, add_dataloader_idx=add_dataloader_idx, **kwargs))
+        for opt_idx in latest_result_opt:
+            latest_result = latest_result_opt[opt_idx]
+            add_dataloader_idx = self.check_dataloader_idx(latest_result)
+            func = getattr(latest_result, func_name)
+            results.update(func(*args, add_dataloader_idx=add_dataloader_idx, **kwargs))
         return results
 
     def run_latest_batch_metrics_with_func_name(self, func_name, *args, **kwargs) -> List[Dict]:
@@ -156,6 +158,7 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio
         assert isinstance(result, Result)
         if dataloader_idx is None:
             dataloader_idx = 0
+
         if extra_info is None:
             extra_info = {}
 
@@ -166,6 +169,7 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio
             if dataloader_idx not in self._internals:
                 self._internals[dataloader_idx] = {}
                 self._internals_reduced[dataloader_idx] = defaultdict(dict)
+                self._latest_ref[dataloader_idx] = {}
 
             # extract infos
             opt_idx = extra_info["opt_idx"]
@@ -173,7 +177,7 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio
 
             self._append_to_structure(self._internals[dataloader_idx], opt_idx, batch_idx, result)
 
-            self._latest_ref[dataloader_idx] = result
+            self._latest_ref[dataloader_idx][opt_idx] = result
 
         # [dataloader_idx] is a list
         else:
@@ -181,7 +185,11 @@ def append(self, result, dataloader_idx: Optional[int] = None, extra_info: Optio
             self._internals.setdefault(dataloader_idx, [])
             self._internals[dataloader_idx].append(result)
 
-            self._latest_ref[dataloader_idx] = result
+            if dataloader_idx not in self._latest_ref:
+                self._latest_ref[dataloader_idx] = {}
+                self._latest_ref[dataloader_idx][0] = {}
+
+            self._latest_ref[dataloader_idx][0] = result
 
     def auto_reduce_results_on_epoch_end(self) -> None:
         """
@@ -206,13 +214,9 @@ def auto_reduce_results_on_epoch_end(self) -> None:
                     # TODO: How to start training in middle of epoch
                     opt_outputs = epoch_metrics[opt_idx]
 
-                    num_batch_idx = len(self._internals[dl_idx][num_opt_idx]) - 1
-                    assert num_batch_idx >= 0
-                    batch_indexes = self._internals[dl_idx][num_opt_idx].keys()
-
                     # reduce across time first
                     time_reduced_outputs = []
-                    for batch_idx in batch_indexes:
+                    for batch_idx in opt_outputs.keys():
                         tbptt_outs = opt_outputs[batch_idx]
                         tbptt_outs = tbptt_outs[0].__class__.reduce_across_time(tbptt_outs)
                         if len(tbptt_outs) > 1:
diff --git a/pytorch_lightning/trainer/optimizers.py b/pytorch_lightning/trainer/optimizers.py
index 6f3ba80bd0734f..479d4017202611 100644
--- a/pytorch_lightning/trainer/optimizers.py
+++ b/pytorch_lightning/trainer/optimizers.py
@@ -94,6 +94,7 @@ def configure_schedulers(self, schedulers: list, monitor: Optional[str] = None):
         lr_schedulers = []
         default_config = {
             'scheduler': None,
+            'name': None,  # no custom name
             'interval': 'epoch',  # after epoch is over
             'frequency': 1,  # every epoch/batch
             'reduce_on_plateau': False,  # most often not ReduceLROnPlateau scheduler
diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py
index 57747be0d51fb5..b2ba92846b241a 100644
--- a/pytorch_lightning/trainer/supporters.py
+++ b/pytorch_lightning/trainer/supporters.py
@@ -50,7 +50,7 @@ def __init__(self, window_length: int):
 
     def reset(self) -> None:
         """Empty the accumulator."""
-        self = TensorRunningAccum(self.window_length)
+        self.__init__(self.window_length)
 
     def last(self):
         """Get the last added element."""
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 35da90625adefe..5a837956bc4ce2 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -133,7 +133,7 @@ def __init__(
         distributed_backend: Optional[str] = None,
         automatic_optimization: Optional[bool] = None,
         move_metrics_to_cpu: bool = False,
-        enable_pl_optimizer: bool = True,
+        enable_pl_optimizer: bool = False,
     ):
         r"""
         Customize every aspect of training via flags
diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py
index 9724f05247c009..c315c6633b6fb3 100644
--- a/pytorch_lightning/utilities/distributed.py
+++ b/pytorch_lightning/utilities/distributed.py
@@ -15,14 +15,14 @@
 import os
 import warnings
 from functools import wraps
+from typing import Any, Optional, Union
 
 import torch
+
 from pytorch_lightning import _logger as log
-from typing import Union, Optional, Any
 
 if torch.distributed.is_available():
-    from torch.distributed import ReduceOp
-    from torch.distributed import group
+    from torch.distributed import ReduceOp, group
 else:
     class ReduceOp:
         SUM = None
@@ -145,15 +145,14 @@ def sync_ddp(
     if group is None:
         group = torch.distributed.group.WORLD
 
-    if reduce_op is None:
-        reduce_op = torch.distributed.ReduceOp.SUM
-    elif isinstance(reduce_op, str) and reduce_op in ("avg", "mean"):
-        reduce_op = torch.distributed.ReduceOp.SUM
+    op = reduce_op if isinstance(reduce_op, ReduceOp) else ReduceOp.SUM
+
+    if isinstance(reduce_op, str) and reduce_op.lower() in ("avg", "mean"):
         divide_by_world_size = True
 
     # sync all processes before reduction
     torch.distributed.barrier(group=group)
-    torch.distributed.all_reduce(result, op=reduce_op, group=group, async_op=False)
+    torch.distributed.all_reduce(result, op=op, group=group, async_op=False)
 
     if divide_by_world_size:
         result = result / torch.distributed.get_world_size(group)
diff --git a/tests/__init__.py b/tests/__init__.py
index 981d685430da99..1bb81c466e6eb7 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,3 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 
 import numpy as np
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index d8847d592e1de6..6f6b5f858ff175 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -15,7 +15,6 @@
 from torch import nn
 from torch.utils.data import Dataset, DataLoader
 
-from pytorch_lightning.core.step_result import TrainResult, EvalResult
 from pytorch_lightning.core.lightning import LightningModule
 
 
@@ -111,235 +110,6 @@ def training_epoch_end_scalar(self, outputs):
                 assert batch_out.grad_fn is None
                 assert isinstance(batch_out, torch.Tensor)
 
-    def training_step_no_default_callbacks_for_train_loop(self, batch, batch_idx):
-        """
-        Early stop and checkpoint only on these values
-        """
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-        assert 'early_step_on' not in result
-        assert 'checkpoint_on' in result
-        return result
-
-    def training_step_no_callbacks_result_obj(self, batch, batch_idx):
-        """
-        Early stop and checkpoint only on these values
-        """
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc, checkpoint_on=False)
-        assert 'early_step_on' not in result
-        assert 'checkpoint_on' not in result
-        return result
-
-    def training_step_result_log_epoch_and_step_for_callbacks(self, batch, batch_idx):
-        """
-        Early stop and checkpoint only on these values
-        """
-        acc = self.step(batch, batch_idx)
-
-        self.assert_backward = False
-        losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20]
-        idx = self.current_epoch
-        loss = acc + losses[idx]
-        result = TrainResult(minimize=loss, early_stop_on=loss, checkpoint_on=loss)
-        return result
-
-    def training_step_result_log_step_only(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-
-        # step only metrics
-        result.log(f'step_log_and_pbar_acc1_b{batch_idx}', torch.tensor(11).type_as(acc), prog_bar=True)
-        result.log(f'step_log_acc2_b{batch_idx}', torch.tensor(12).type_as(acc))
-        result.log(f'step_pbar_acc3_b{batch_idx}', torch.tensor(13).type_as(acc), logger=False, prog_bar=True)
-
-        self.training_step_called = True
-        return result
-
-    def training_step_result_log_epoch_only(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-
-        result.log(f'epoch_log_and_pbar_acc1_e{self.current_epoch}', torch.tensor(14).type_as(acc),
-                   on_epoch=True, prog_bar=True, on_step=False)
-        result.log(f'epoch_log_acc2_e{self.current_epoch}', torch.tensor(15).type_as(acc),
-                   on_epoch=True, on_step=False)
-        result.log(f'epoch_pbar_acc3_e{self.current_epoch}', torch.tensor(16).type_as(acc),
-                   on_epoch=True, logger=False, prog_bar=True, on_step=False)
-
-        self.training_step_called = True
-        return result
-
-    def training_step_result_log_epoch_and_step(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-
-        val_1 = (5 + batch_idx) * (self.current_epoch + 1)
-        val_2 = (6 + batch_idx) * (self.current_epoch + 1)
-        val_3 = (7 + batch_idx) * (self.current_epoch + 1)
-        result.log('step_epoch_log_and_pbar_acc1', torch.tensor(val_1).type_as(acc),
-                   on_epoch=True, prog_bar=True)
-        result.log('step_epoch_log_acc2', torch.tensor(val_2).type_as(acc),
-                   on_epoch=True)
-        result.log('step_epoch_pbar_acc3', torch.tensor(val_3).type_as(acc),
-                   on_epoch=True, logger=False, prog_bar=True)
-
-        self.training_step_called = True
-        return result
-
-    def training_epoch_end_return_for_log_epoch_and_step(self, result):
-        """
-        There should be an array of scalars without graphs that are all 171 (4 of them)
-        """
-        self.training_epoch_end_called = True
-
-        if self.use_dp or self.use_ddp2:
-            pass
-        else:
-            # only saw 4 batches
-            assert isinstance(result, TrainResult)
-
-        result.step_epoch_log_acc2 = result.step_epoch_log_acc2_step.prod()
-        result.step_epoch_pbar_acc3 = result.step_epoch_pbar_acc3_step.prod()
-        result.step_epoch_log_and_pbar_acc1 = result.step_epoch_log_and_pbar_acc1_step.prod()
-        result.minimize = result.minimize.mean()
-        result.checkpoint_on = result.checkpoint_on.mean()
-
-        result.step_epoch_log_and_pbar_acc1_step = result.step_epoch_log_and_pbar_acc1_step.prod()
-        result.step_epoch_log_and_pbar_acc1_epoch = result.step_epoch_log_and_pbar_acc1_epoch.prod()
-        result.step_epoch_log_acc2_step = result.step_epoch_log_acc2_step.prod()
-        result.step_epoch_log_acc2_epoch = result.step_epoch_log_acc2_epoch.prod()
-        result.step_epoch_pbar_acc3_step = result.step_epoch_pbar_acc3_step.prod()
-        result.step_epoch_pbar_acc3_epoch = result.step_epoch_pbar_acc3_epoch.prod()
-        result.log('epoch_end_log_acc', torch.tensor(1212).type_as(result.step_epoch_log_acc2_epoch),
-                   logger=True, on_epoch=True)
-        result.log('epoch_end_pbar_acc', torch.tensor(1213).type_as(result.step_epoch_log_acc2_epoch),
-                   logger=False, prog_bar=True, on_epoch=True)
-        result.log('epoch_end_log_pbar_acc', torch.tensor(1214).type_as(result.step_epoch_log_acc2_epoch),
-                   logger=True, prog_bar=True, on_epoch=True)
-        return result
-
-    # --------------------------
-    # EvalResults
-    # --------------------------
-    def validation_step_result_callbacks(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-
-        self.assert_backward = False
-        losses = [20, 19, 20, 21, 22, 23]
-        idx = self.current_epoch
-        loss = acc + losses[idx]
-        result = EvalResult(early_stop_on=loss, checkpoint_on=loss)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_no_callbacks(self, batch, batch_idx):
-        acc = self.step(batch, batch_idx)
-
-        self.assert_backward = False
-        losses = [20, 19, 20, 21, 22, 23, 50, 50, 50, 50, 50, 50]
-        idx = self.current_epoch
-        loss = acc + losses[idx]
-
-        result = EvalResult(checkpoint_on=loss)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_only_epoch_metrics(self, batch, batch_idx):
-        """
-        Only track epoch level metrics
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('no_val_no_pbar', torch.tensor(11 + batch_idx).type_as(acc), prog_bar=False, logger=False)
-        result.log('val_step_log_acc', torch.tensor(11 + batch_idx).type_as(acc), prog_bar=False, logger=True)
-        result.log('val_step_log_pbar_acc', torch.tensor(12 + batch_idx).type_as(acc), prog_bar=True, logger=True)
-        result.log('val_step_pbar_acc', torch.tensor(13 + batch_idx).type_as(acc), prog_bar=True, logger=False)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_only_step_metrics(self, batch, batch_idx):
-        """
-        Only track epoch level metrics
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('no_val_no_pbar', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=False, on_epoch=False, on_step=True)
-        result.log('val_step_log_acc', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=True, on_epoch=False, on_step=True)
-        result.log('val_step_log_pbar_acc', torch.tensor(12 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=False, on_step=True)
-        result.log('val_step_pbar_acc', torch.tensor(13 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=False, on_epoch=False, on_step=True)
-        result.log('val_step_batch_idx', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=False, on_step=True)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_result_epoch_step_metrics(self, batch, batch_idx):
-        """
-        Only track epoch level metrics
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('no_val_no_pbar', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=False, on_epoch=True, on_step=True)
-        result.log('val_step_log_acc', torch.tensor(11 + batch_idx).type_as(acc),
-                   prog_bar=False, logger=True, on_epoch=True, on_step=True)
-        result.log('val_step_log_pbar_acc', torch.tensor(12 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=True)
-        result.log('val_step_pbar_acc', torch.tensor(13 + batch_idx).type_as(acc),
-                   prog_bar=True, logger=False, on_epoch=True, on_step=True)
-        result.log('val_step_batch_idx', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=True)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_step_for_epoch_end_result(self, batch, batch_idx):
-        """
-        EvalResult flows to epoch end (without step_end)
-        """
-        acc = self.step(batch, batch_idx)
-        result = EvalResult(checkpoint_on=acc, early_stop_on=acc)
-
-        # step only metrics
-        result.log('val_step_metric', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=False)
-        result.log('batch_idx', torch.tensor(batch_idx).type_as(acc),
-                   prog_bar=True, logger=True, on_epoch=True, on_step=False)
-
-        self.validation_step_called = True
-        return result
-
-    def validation_epoch_end_result(self, result):
-        self.validation_epoch_end_called = True
-
-        if self.trainer.running_sanity_check:
-            assert len(result.batch_idx) == 2
-        else:
-            assert len(result.batch_idx) == self.trainer.limit_val_batches
-
-        expected_val = result.val_step_metric.sum() / len(result.batch_idx)
-        result.val_step_metric = result.val_step_metric.mean()
-        result.batch_idx = result.batch_idx.mean()
-        assert result.val_step_metric == expected_val
-
-        result.log('val_epoch_end_metric', torch.tensor(189).type_as(result.val_step_metric), prog_bar=True)
-
-        return result
-
     # --------------------------
     # dictionary returns
     # --------------------------
diff --git a/tests/base/develop_pipelines.py b/tests/base/develop_pipelines.py
index 18bb0c4d72715b..24535dc67da8e8 100644
--- a/tests/base/develop_pipelines.py
+++ b/tests/base/develop_pipelines.py
@@ -86,9 +86,11 @@ def run_model_test(trainer_options, model, on_gpu: bool = True, version=None, wi
             trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = \
                 trainer.init_optimizers(pretrained_model)
 
-        # test HPC loading / saving
+        # test HPC saving
         trainer.checkpoint_connector.hpc_save(save_dir, logger)
-        trainer.checkpoint_connector.hpc_load(save_dir, on_gpu=on_gpu)
+        # test HPC loading
+        checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(save_dir)
+        trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)
 
 
 def run_prediction(dataloader, trained_model, dp=False, min_acc=0.50):
diff --git a/tests/base/model_test_steps.py b/tests/base/model_test_steps.py
index 0010dcdf14a197..440ec4c4c35b47 100644
--- a/tests/base/model_test_steps.py
+++ b/tests/base/model_test_steps.py
@@ -59,38 +59,6 @@ def test_step(self, batch, batch_idx, *args, **kwargs):
                                   'test_dic': {'test_loss_a': loss_test}})
             return output
 
-    def test_step_result_obj(self, batch, batch_idx, *args, **kwargs):
-        """
-        Default, baseline test_step
-        :param batch:
-        :return:
-        """
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        loss_test = self.loss(y, y_hat)
-
-        # acc
-        labels_hat = torch.argmax(y_hat, dim=1)
-        test_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
-        test_acc = torch.tensor(test_acc)
-
-        test_acc = test_acc.type_as(x)
-
-        result = EvalResult()
-        # alternate possible outputs to test
-        if batch_idx % 1 == 0:
-            result.log_dict({'test_loss': loss_test, 'test_acc': test_acc})
-            return result
-        if batch_idx % 2 == 0:
-            return test_acc
-
-        if batch_idx % 3 == 0:
-            result.log_dict({'test_loss': loss_test, 'test_acc': test_acc})
-            result.test_dic = {'test_loss_a': loss_test}
-            return result
-
     def test_step__multiple_dataloaders(self, batch, batch_idx, dataloader_idx, **kwargs):
         """
         Default, baseline test_step
diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index caec6db9aaa10a..0590f5b7b5cccf 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -53,25 +53,6 @@ def training_step(self, batch, batch_idx, optimizer_idx=None):
         )
         return output
 
-    def training_step_result_obj(self, batch, batch_idx, optimizer_idx=None):
-        # forward pass
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        # calculate loss
-        loss_val = self.loss(y, y_hat)
-        log_val = loss_val
-
-        # alternate between tensors and scalars for "log" and "progress_bar"
-        if batch_idx % 2 == 0:
-            log_val = log_val.item()
-
-        result = TrainResult(loss_val)
-        result.log('some_val', log_val * log_val, prog_bar=True, logger=False)
-        result.log('train_some_val', log_val * log_val)
-        return result
-
     def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None):
         output = self.training_step(batch, batch_idx, optimizer_idx)
         if batch_idx == self.test_step_inf_loss:
@@ -81,19 +62,6 @@ def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None):
                 output /= 0
         return output
 
-    def training_step_full_loop_result_obj_dp(self, batch, batch_idx, optimizer_idx=None):
-        """
-        Full loop flow train step (result obj + dp)
-        """
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x.to(self.device))
-        loss_val = y_hat.sum()
-        result = TrainResult(minimize=loss_val)
-        result.log('train_step_metric', loss_val + 1)
-        self.training_step_called = True
-        return result
-
     def training_step_result_obj_dp(self, batch, batch_idx, optimizer_idx=None):
         # forward pass
         x, y = batch
@@ -136,23 +104,6 @@ def training_epoch_end_full_loop_result_obj_dp(self, result):
 
         return result
 
-    def eval_step_full_loop_result_obj_dp(self, batch, batch_idx, optimizer_idx=None):
-        """
-        Full loop flow train step (result obj + dp)
-        """
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x.to(self.device))
-        loss_val = y_hat.sum()
-        result = EvalResult(checkpoint_on=loss_val, early_stop_on=loss_val)
-
-        eval_name = 'validation' if not self.trainer.testing else 'test'
-        result.log(f'{eval_name}_step_metric', loss_val + 1, on_step=True)
-
-        setattr(self, f'{eval_name}_step_called', True)
-
-        return result
-
     def eval_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step (result obj + dp)
@@ -198,20 +149,3 @@ def eval_epoch_end_full_loop_result_obj_dp(self, result):
         setattr(result, f'{eval_name}_step_metric', reduced)
 
         return result
-
-    def training_step__using_metrics(self, batch, batch_idx, optimizer_idx=None):
-        """Lightning calls this inside the training loop"""
-        # forward pass
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        # calculate loss
-        loss_val = self.loss(y, y_hat)
-
-        # call metric
-        val = self.metric(x, y)
-
-        result = TrainResult(minimize=loss_val)
-        result.log('metric_val', val)
-        return result
diff --git a/tests/base/model_valid_steps.py b/tests/base/model_valid_steps.py
index e23e62dccdaba6..a008a6cecf1102 100644
--- a/tests/base/model_valid_steps.py
+++ b/tests/base/model_valid_steps.py
@@ -71,25 +71,6 @@ def validation_step_no_monitor(self, batch, batch_idx, *args, **kwargs):
         })
         return output
 
-    def validation_step_result_obj(self, batch, batch_idx, *args, **kwargs):
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-
-        loss_val = self.loss(y, y_hat)
-
-        # acc
-        labels_hat = torch.argmax(y_hat, dim=1)
-        val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
-        val_acc = torch.tensor(val_acc).type_as(x)
-
-        result = EvalResult(checkpoint_on=loss_val, early_stop_on=loss_val)
-        result.log_dict({
-            'val_loss': loss_val,
-            'val_acc': val_acc,
-        })
-        return result
-
     def validation_step_result_obj_dp(self, batch, batch_idx, *args, **kwargs):
         x, y = batch
         x = x.view(x.size(0), -1)
diff --git a/tests/callbacks/test_callbacks.py b/tests/callbacks/test_callbacks.py
index c00c712bb3b13c..070bb4e9f6989b 100644
--- a/tests/callbacks/test_callbacks.py
+++ b/tests/callbacks/test_callbacks.py
@@ -33,6 +33,8 @@ def test_trainer_callback_system(torch_save):
         limit_train_batches=3,
         limit_test_batches=2,
         progress_bar_refresh_rate=0,
+        # todo: enabled since internally we wrap the model for optimizer step, this should be fixed
+        enable_pl_optimizer=True
     )
 
     # no call yet
diff --git a/tests/callbacks/test_lr_monitor.py b/tests/callbacks/test_lr_monitor.py
index a6783435ed3e27..d29f254df67d0d 100644
--- a/tests/callbacks/test_lr_monitor.py
+++ b/tests/callbacks/test_lr_monitor.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 import pytest
 
+import tests.base.develop_utils as tutils
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import LearningRateMonitor
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from tests.base import EvalModelTemplate
-import tests.base.develop_utils as tutils
+from tests.base import BoringModel, EvalModelTemplate
 
 
 def test_lr_monitor_single_lr(tmpdir):
@@ -43,7 +43,7 @@ def test_lr_monitor_single_lr(tmpdir):
         'Momentum should not be logged by default'
     assert len(lr_monitor.lrs) == len(trainer.lr_schedulers), \
         'Number of learning rates logged does not match number of lr schedulers'
-    assert all([k in ['lr-Adam'] for k in lr_monitor.lrs.keys()]), \
+    assert lr_monitor.lr_sch_names == list(lr_monitor.lrs.keys()) == ['lr-Adam'], \
         'Names of learning rates not set correctly'
 
 
@@ -134,7 +134,7 @@ def test_lr_monitor_multi_lrs(tmpdir, logging_interval):
     assert lr_monitor.lrs, 'No learning rates logged'
     assert len(lr_monitor.lrs) == len(trainer.lr_schedulers), \
         'Number of learning rates logged does not match number of lr schedulers'
-    assert all([k in ['lr-Adam', 'lr-Adam-1'] for k in lr_monitor.lrs.keys()]), \
+    assert lr_monitor.lr_sch_names == ['lr-Adam', 'lr-Adam-1'], \
         'Names of learning rates not set correctly'
 
     if logging_interval == 'step':
@@ -167,5 +167,27 @@ def test_lr_monitor_param_groups(tmpdir):
     assert lr_monitor.lrs, 'No learning rates logged'
     assert len(lr_monitor.lrs) == 2 * len(trainer.lr_schedulers), \
         'Number of learning rates logged does not match number of param groups'
-    assert all([k in ['lr-Adam/pg1', 'lr-Adam/pg2'] for k in lr_monitor.lrs.keys()]), \
+    assert lr_monitor.lr_sch_names == ['lr-Adam']
+    assert list(lr_monitor.lrs.keys()) == ['lr-Adam/pg1', 'lr-Adam/pg2'], \
         'Names of learning rates not set correctly'
+
+
+def test_lr_monitor_custom_name(tmpdir):
+    class TestModel(BoringModel):
+        def configure_optimizers(self):
+            optimizer, [scheduler] = super().configure_optimizers()
+            lr_scheduler = {'scheduler': scheduler, 'name': 'my_logging_name'}
+            return optimizer, [lr_scheduler]
+
+    lr_monitor = LearningRateMonitor()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=2,
+        limit_val_batches=0.1,
+        limit_train_batches=0.5,
+        callbacks=[lr_monitor],
+        progress_bar_refresh_rate=0,
+        weights_summary=None,
+    )
+    trainer.fit(TestModel())
+    assert lr_monitor.lr_sch_names == list(lr_monitor.lrs.keys()) == ['my_logging_name']
diff --git a/tests/checkpointing/test_model_checkpoint.py b/tests/checkpointing/test_model_checkpoint.py
index 31154eac1bf0d6..106c34030051e7 100644
--- a/tests/checkpointing/test_model_checkpoint.py
+++ b/tests/checkpointing/test_model_checkpoint.py
@@ -12,15 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-import os.path as osp
 import pickle
 import platform
 import re
 from argparse import Namespace
-from distutils.version import LooseVersion
 from pathlib import Path
 from unittest import mock
-from unittest.mock import MagicMock, Mock
+from unittest.mock import Mock
 
 import cloudpickle
 import pytest
@@ -641,20 +639,17 @@ def validation_epoch_end(self, outputs):
 @pytest.mark.parametrize("enable_pl_optimizer", [False, True])
 def test_checkpoint_repeated_strategy(enable_pl_optimizer, tmpdir):
     """
-    This test validates that the checkpoint can be called when provided to callacks list
+    This test validates that the checkpoint can be called when provided to callbacks list
     """
-
     checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath=tmpdir, filename="{epoch:02d}")
 
     class ExtendedBoringModel(BoringModel):
-
         def validation_step(self, batch, batch_idx):
             output = self.layer(batch)
             loss = self.loss(batch, output)
             return {"val_loss": loss}
 
     model = ExtendedBoringModel()
-    model.validation_step_end = None
     model.validation_epoch_end = None
     trainer = Trainer(
         max_epochs=1,
@@ -663,92 +658,30 @@ def validation_step(self, batch, batch_idx):
         limit_test_batches=2,
         callbacks=[checkpoint_callback],
         enable_pl_optimizer=enable_pl_optimizer,
+        weights_summary=None,
+        progress_bar_refresh_rate=0,
     )
-
     trainer.fit(model)
     assert os.listdir(tmpdir) == ['epoch=00.ckpt']
 
-    def get_last_checkpoint():
-        ckpts = os.listdir(tmpdir)
-        ckpts_map = {int(x.split("=")[1].split('.')[0]): osp.join(tmpdir, x) for x in ckpts if "epoch" in x}
-        num_ckpts = len(ckpts_map) - 1
-        return ckpts_map[num_ckpts]
-
-    for idx in range(1, 5):
+    for idx in range(4):
         # load from checkpoint
-        chk = get_last_checkpoint()
-        model = BoringModel.load_from_checkpoint(chk)
-        trainer = pl.Trainer(
-            max_epochs=1,
-            limit_train_batches=2,
-            limit_val_batches=2,
-            limit_test_batches=2,
-            resume_from_checkpoint=chk,
-            enable_pl_optimizer=enable_pl_optimizer)
-        trainer.fit(model)
-        trainer.test(model)
-
-        assert str(os.listdir(tmpdir)) == "['epoch=00.ckpt']"
-
-
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@pytest.mark.parametrize("enable_pl_optimizer", [False, True])
-def test_checkpoint_repeated_strategy_tmpdir(enable_pl_optimizer, tmpdir):
-    """
-    This test validates that the checkpoint can be called when provided to callacks list
-    """
-
-    checkpoint_callback = ModelCheckpoint(monitor='val_loss', filepath=os.path.join(tmpdir, "{epoch:02d}"))
-
-    class ExtendedBoringModel(BoringModel):
-
-        def validation_step(self, batch, batch_idx):
-            output = self.layer(batch)
-            loss = self.loss(batch, output)
-            return {"val_loss": loss}
-
-    model = ExtendedBoringModel()
-    model.validation_step_end = None
-    model.validation_epoch_end = None
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        max_epochs=1,
-        limit_train_batches=2,
-        limit_val_batches=2,
-        limit_test_batches=2,
-        callbacks=[checkpoint_callback],
-        enable_pl_optimizer=enable_pl_optimizer,
-    )
-
-    trainer.fit(model)
-    assert sorted(os.listdir(tmpdir)) == sorted(['epoch=00.ckpt', 'lightning_logs'])
-    path_to_lightning_logs = osp.join(tmpdir, 'lightning_logs')
-    assert sorted(os.listdir(path_to_lightning_logs)) == sorted(['version_0'])
-
-    def get_last_checkpoint():
-        ckpts = os.listdir(tmpdir)
-        ckpts_map = {int(x.split("=")[1].split('.')[0]): osp.join(tmpdir, x) for x in ckpts if "epoch" in x}
-        num_ckpts = len(ckpts_map) - 1
-        return ckpts_map[num_ckpts]
-
-    for idx in range(1, 5):
-
-        # load from checkpoint
-        chk = get_last_checkpoint()
-        model = LogInTwoMethods.load_from_checkpoint(chk)
+        model = LogInTwoMethods.load_from_checkpoint(checkpoint_callback.best_model_path)
         trainer = pl.Trainer(
             default_root_dir=tmpdir,
             max_epochs=1,
             limit_train_batches=2,
             limit_val_batches=2,
             limit_test_batches=2,
-            resume_from_checkpoint=chk,
-            enable_pl_optimizer=enable_pl_optimizer)
-
+            resume_from_checkpoint=checkpoint_callback.best_model_path,
+            enable_pl_optimizer=enable_pl_optimizer,
+            weights_summary=None,
+            progress_bar_refresh_rate=0,
+        )
         trainer.fit(model)
-        trainer.test(model)
-        assert sorted(os.listdir(tmpdir)) == sorted(['epoch=00.ckpt', 'lightning_logs'])
-        assert sorted(os.listdir(path_to_lightning_logs)) == sorted([f'version_{i}' for i in range(idx + 1)])
+        trainer.test(model, verbose=False)
+    assert set(os.listdir(tmpdir)) == {'epoch=00.ckpt', 'lightning_logs'}
+    assert set(os.listdir(tmpdir.join("lightning_logs"))) == {f'version_{i}' for i in range(4)}
 
 
 @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
@@ -760,21 +693,22 @@ def test_checkpoint_repeated_strategy_extended(enable_pl_optimizer, tmpdir):
     """
 
     class ExtendedBoringModel(BoringModel):
-
         def validation_step(self, batch, batch_idx):
             output = self.layer(batch)
             loss = self.loss(batch, output)
             return {"val_loss": loss}
 
+        def validation_epoch_end(self, *_):
+            ...
+
     def assert_trainer_init(trainer):
         assert not trainer.checkpoint_connector.has_trained
         assert trainer.global_step == 0
         assert trainer.current_epoch == 0
 
     def get_last_checkpoint(ckpt_dir):
-        ckpts = os.listdir(ckpt_dir)
-        ckpts.sort()
-        return osp.join(ckpt_dir, ckpts[-1])
+        last = ckpt_dir.listdir(sort=True)[-1]
+        return str(last)
 
     def assert_checkpoint_content(ckpt_dir):
         chk = pl_load(get_last_checkpoint(ckpt_dir))
@@ -782,23 +716,15 @@ def assert_checkpoint_content(ckpt_dir):
         assert chk["global_step"] == 4
 
     def assert_checkpoint_log_dir(idx):
-        lightning_logs_path = osp.join(tmpdir, 'lightning_logs')
-        assert sorted(os.listdir(lightning_logs_path)) == [f'version_{i}' for i in range(idx + 1)]
-        assert len(os.listdir(ckpt_dir)) == epochs
-
-    def get_model():
-        model = ExtendedBoringModel()
-        model.validation_step_end = None
-        model.validation_epoch_end = None
-        return model
+        lightning_logs = tmpdir / 'lightning_logs'
+        actual = [d.basename for d in lightning_logs.listdir(sort=True)]
+        assert actual == [f'version_{i}' for i in range(idx + 1)]
+        assert len(ckpt_dir.listdir()) == epochs
 
-    ckpt_dir = osp.join(tmpdir, 'checkpoints')
+    ckpt_dir = tmpdir / 'checkpoints'
     checkpoint_cb = ModelCheckpoint(dirpath=ckpt_dir, save_top_k=-1)
     epochs = 2
     limit_train_batches = 2
-
-    model = get_model()
-
     trainer_config = dict(
         default_root_dir=tmpdir,
         max_epochs=epochs,
@@ -806,40 +732,32 @@ def get_model():
         limit_val_batches=3,
         limit_test_batches=4,
         enable_pl_optimizer=enable_pl_optimizer,
-    )
-
-    trainer = pl.Trainer(
-        **trainer_config,
         callbacks=[checkpoint_cb],
     )
+    trainer = pl.Trainer(**trainer_config)
     assert_trainer_init(trainer)
 
+    model = ExtendedBoringModel()
     trainer.fit(model)
     assert trainer.checkpoint_connector.has_trained
     assert trainer.global_step == epochs * limit_train_batches
     assert trainer.current_epoch == epochs - 1
     assert_checkpoint_log_dir(0)
+    assert_checkpoint_content(ckpt_dir)
 
     trainer.test(model)
     assert trainer.current_epoch == epochs - 1
 
-    assert_checkpoint_content(ckpt_dir)
-
     for idx in range(1, 5):
         chk = get_last_checkpoint(ckpt_dir)
         assert_checkpoint_content(ckpt_dir)
 
-        checkpoint_cb = ModelCheckpoint(dirpath=ckpt_dir, save_top_k=-1)
-        model = get_model()
-
         # load from checkpoint
-        trainer = pl.Trainer(
-            **trainer_config,
-            resume_from_checkpoint=chk,
-            callbacks=[checkpoint_cb],
-        )
+        trainer_config["callbacks"] = [ModelCheckpoint(dirpath=ckpt_dir, save_top_k=-1)]
+        trainer = pl.Trainer(**trainer_config, resume_from_checkpoint=chk)
         assert_trainer_init(trainer)
 
+        model = ExtendedBoringModel()
         trainer.test(model)
         assert not trainer.checkpoint_connector.has_trained
         assert trainer.global_step == epochs * limit_train_batches
@@ -1020,3 +938,42 @@ def __init__(self, hparams):
     else:
         # make sure it's not AttributeDict
         assert type(ckpt[model.CHECKPOINT_HYPER_PARAMS_KEY]) == hparams_type
+
+
+@pytest.mark.parametrize('max_epochs', [3, 4])
+@pytest.mark.parametrize(
+    'save_top_k, expected',
+    [
+        (1, ['curr_epoch.ckpt']),
+        (2, ['curr_epoch.ckpt', 'curr_epoch-v0.ckpt']),
+    ]
+)
+def test_model_checkpoint_file_already_exists(tmpdir, max_epochs, save_top_k, expected):
+    """
+    Test that version is added to filename if required and it already exists in dirpath.
+    """
+    model_checkpoint = ModelCheckpoint(
+        dirpath=tmpdir,
+        filename='curr_epoch',
+        save_top_k=save_top_k,
+        monitor='epoch',
+        mode='max',
+    )
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        callbacks=[model_checkpoint],
+        max_epochs=max_epochs,
+        limit_train_batches=2,
+        limit_val_batches=2,
+        logger=None,
+        weights_summary=None,
+        progress_bar_refresh_rate=0,
+    )
+
+    model = BoringModel()
+    trainer.fit(model)
+    ckpt_files = os.listdir(tmpdir)
+    assert set(ckpt_files) == set(expected)
+
+    epochs_in_ckpt_files = [pl_load(os.path.join(tmpdir, f))['epoch'] - 1 for f in ckpt_files]
+    assert sorted(epochs_in_ckpt_files) == list(range(max_epochs - save_top_k, max_epochs))
diff --git a/tests/collect_env_details.py b/tests/collect_env_details.py
index 1d443795d28767..2b8c4b3fafeed3 100644
--- a/tests/collect_env_details.py
+++ b/tests/collect_env_details.py
@@ -1,3 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Diagnose your system and show basic information
 
 This server mainly to get detail info for better bug reporting.
diff --git a/tests/conftest.py b/tests/conftest.py
index ad4b7169456a89..c6a14a99b24789 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,7 +1,21 @@
-import sys
-import threading
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from functools import partial, wraps
 from http.server import SimpleHTTPRequestHandler
+import sys
+import threading
 
 import pytest
 import torch.multiprocessing as mp
diff --git a/tests/core/test_results.py b/tests/core/test_results.py
index f4486ce6ae4194..797004b7f21ffa 100644
--- a/tests/core/test_results.py
+++ b/tests/core/test_results.py
@@ -18,7 +18,7 @@
 import torch
 import torch.distributed as dist
 import torch.multiprocessing as mp
-from pytorch_lightning import Trainer, seed_everything
+from pytorch_lightning import Trainer
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
 import tests.base.develop_utils as tutils
 
diff --git a/tests/deprecated_api/__init__.py b/tests/deprecated_api/__init__.py
new file mode 100644
index 00000000000000..99e21d1ed6b229
--- /dev/null
+++ b/tests/deprecated_api/__init__.py
@@ -0,0 +1,21 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test deprecated functionality which will be removed in vX.Y.Z"""
+import sys
+
+
+def _soft_unimport_module(str_module):
+    # once the module is imported  e.g with parsing with pytest it lives in memory
+    if str_module in sys.modules:
+        del sys.modules[str_module]
diff --git a/tests/deprecated_api/test_remove_1-2.py b/tests/deprecated_api/test_remove_1-2.py
new file mode 100644
index 00000000000000..331208d56df103
--- /dev/null
+++ b/tests/deprecated_api/test_remove_1-2.py
@@ -0,0 +1,45 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test deprecated functionality which will be removed in vX.Y.Z"""
+
+import pytest
+import torch
+
+from pytorch_lightning.callbacks import ModelCheckpoint
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+def test_tbd_remove_in_v1_2_0():
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        ModelCheckpoint(filepath='..')
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        ModelCheckpoint('..')
+
+    with pytest.raises(MisconfigurationException, match='inputs which are not feasible'):
+        ModelCheckpoint(filepath='..', dirpath='.')
+
+
+def test_tbd_remove_in_v1_2_0_metrics():
+    from pytorch_lightning.metrics.classification import Fbeta
+    from pytorch_lightning.metrics.functional.classification import f1_score, fbeta_score
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        Fbeta(2)
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        fbeta_score(torch.tensor([0, 1, 2, 3]), torch.tensor([0, 1, 2, 1]), 0.2)
+
+    with pytest.deprecated_call(match='will be removed in v1.2'):
+        f1_score(torch.tensor([0, 1, 0, 1]), torch.tensor([0, 1, 0, 0]))
diff --git a/tests/test_deprecated.py b/tests/deprecated_api/test_remove_1-3.py
similarity index 60%
rename from tests/test_deprecated.py
rename to tests/deprecated_api/test_remove_1-3.py
index 59c6728009b6f0..7ec69796b1e46e 100644
--- a/tests/test_deprecated.py
+++ b/tests/deprecated_api/test_remove_1-3.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Test deprecated functionality which will be removed in vX.Y.Z"""
-import sys
 from argparse import ArgumentParser
 from unittest import mock
 
@@ -21,10 +20,8 @@
 
 from pytorch_lightning import LightningModule, Trainer
 from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
-from pytorch_lightning.metrics.functional.classification import auc
 from pytorch_lightning.profiler.profilers import PassThroughProfiler, SimpleProfiler
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from tests.base import EvalModelTemplate
 
 
 def test_tbd_remove_in_v1_3_0(tmpdir):
@@ -52,27 +49,27 @@ def __init__(self, hparams):
 
 
 def test_tbd_remove_in_v1_3_0_metrics():
+    from pytorch_lightning.metrics.functional.classification import to_onehot
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import to_onehot
         to_onehot(torch.tensor([1, 2, 3]))
 
+    from pytorch_lightning.metrics.functional.classification import to_categorical
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import to_categorical
         to_categorical(torch.tensor([[0.2, 0.5], [0.9, 0.1]]))
 
+    from pytorch_lightning.metrics.functional.classification import get_num_classes
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import get_num_classes
         get_num_classes(pred=torch.tensor([0, 1]), target=torch.tensor([1, 1]))
 
     x_binary = torch.tensor([0, 1, 2, 3])
     y_binary = torch.tensor([0, 1, 2, 3])
 
+    from pytorch_lightning.metrics.functional.classification import roc
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import roc
         roc(pred=x_binary, target=y_binary)
 
+    from pytorch_lightning.metrics.functional.classification import _roc
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import _roc
         _roc(pred=x_binary, target=y_binary)
 
     x_multy = torch.tensor([[0.85, 0.05, 0.05, 0.05],
@@ -81,64 +78,40 @@ def test_tbd_remove_in_v1_3_0_metrics():
                             [0.05, 0.05, 0.05, 0.85]])
     y_multy = torch.tensor([0, 1, 3, 2])
 
+    from pytorch_lightning.metrics.functional.classification import multiclass_roc
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import multiclass_roc
         multiclass_roc(pred=x_multy, target=y_multy)
 
+    from pytorch_lightning.metrics.functional.classification import average_precision
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import average_precision
         average_precision(pred=x_binary, target=y_binary)
 
+    from pytorch_lightning.metrics.functional.classification import precision_recall_curve
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import precision_recall_curve
         precision_recall_curve(pred=x_binary, target=y_binary)
 
+    from pytorch_lightning.metrics.functional.classification import multiclass_precision_recall_curve
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.classification import multiclass_precision_recall_curve
         multiclass_precision_recall_curve(pred=x_multy, target=y_multy)
 
+    from pytorch_lightning.metrics.functional.reduction import reduce
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.reduction import reduce
         reduce(torch.tensor([0, 1, 1, 0]), 'sum')
 
+    from pytorch_lightning.metrics.functional.reduction import class_reduce
     with pytest.deprecated_call(match='will be removed in v1.3'):
-        from pytorch_lightning.metrics.functional.reduction import class_reduce
         class_reduce(torch.randint(1, 10, (50,)).float(),
                      torch.randint(10, 20, (50,)).float(),
                      torch.randint(1, 100, (50,)).float())
 
 
-def test_tbd_remove_in_v1_2_0():
-    with pytest.deprecated_call(match='will be removed in v1.2'):
-        checkpoint_cb = ModelCheckpoint(filepath='.')
-
-    with pytest.deprecated_call(match='will be removed in v1.2'):
-        checkpoint_cb = ModelCheckpoint('.')
-
-    with pytest.raises(MisconfigurationException, match='inputs which are not feasible'):
-        checkpoint_cb = ModelCheckpoint(filepath='.', dirpath='.')
-
-
-def test_tbd_remove_in_v1_2_0_metrics():
-    from pytorch_lightning.metrics.classification import Fbeta
-    from pytorch_lightning.metrics.functional.classification import f1_score, fbeta_score
-
-    with pytest.deprecated_call(match='will be removed in v1.2'):
-        Fbeta(2)
-
-    with pytest.deprecated_call(match='will be removed in v1.2'):
-        fbeta_score(torch.tensor([0, 1, 2, 3]), torch.tensor([0, 1, 2, 1]), 0.2)
-
-    with pytest.deprecated_call(match='will be removed in v1.2'):
-        f1_score(torch.tensor([0, 1, 0, 1]), torch.tensor([0, 1, 0, 0]))
-
-
 # TODO: remove bool from Trainer.profiler param in v1.3.0, update profiler_connector.py
 @pytest.mark.parametrize(['profiler', 'expected'], [
     (True, SimpleProfiler),
     (False, PassThroughProfiler),
 ])
 def test_trainer_profiler_remove_in_v1_3_0(profiler, expected):
+    # remove bool from Trainer.profiler param in v1.3.0, update profiler_connector.py
     with pytest.deprecated_call(match='will be removed in v1.3'):
         trainer = Trainer(profiler=profiler)
         assert isinstance(trainer.profiler, expected)
@@ -162,47 +135,3 @@ def test_trainer_cli_profiler_remove_in_v1_3_0(cli_args, expected_parsed_arg, ex
     assert getattr(args, "profiler") == expected_parsed_arg
     trainer = Trainer.from_argparse_args(args)
     assert isinstance(trainer.profiler, expected_profiler)
-
-
-def _soft_unimport_module(str_module):
-    # once the module is imported  e.g with parsing with pytest it lives in memory
-    if str_module in sys.modules:
-        del sys.modules[str_module]
-
-
-class ModelVer0_6(EvalModelTemplate):
-
-    # todo: this shall not be needed while evaluate asks for dataloader explicitly
-    def val_dataloader(self):
-        return self.dataloader(train=False)
-
-    def validation_step(self, batch, batch_idx, *args, **kwargs):
-        return {'val_loss': torch.tensor(0.6)}
-
-    def validation_end(self, outputs):
-        return {'val_loss': torch.tensor(0.6)}
-
-    def test_dataloader(self):
-        return self.dataloader(train=False)
-
-    def test_end(self, outputs):
-        return {'test_loss': torch.tensor(0.6)}
-
-
-class ModelVer0_7(EvalModelTemplate):
-
-    # todo: this shall not be needed while evaluate asks for dataloader explicitly
-    def val_dataloader(self):
-        return self.dataloader(train=False)
-
-    def validation_step(self, batch, batch_idx, *args, **kwargs):
-        return {'val_loss': torch.tensor(0.7)}
-
-    def validation_end(self, outputs):
-        return {'val_loss': torch.tensor(0.7)}
-
-    def test_dataloader(self):
-        return self.dataloader(train=False)
-
-    def test_end(self, outputs):
-        return {'test_loss': torch.tensor(0.7)}
diff --git a/tests/metrics/regression/test_ssim.py b/tests/metrics/regression/test_ssim.py
index f581188e89fce5..8bb304850e3f22 100644
--- a/tests/metrics/regression/test_ssim.py
+++ b/tests/metrics/regression/test_ssim.py
@@ -53,9 +53,7 @@ def _sk_metric(preds, target, data_range, multichannel):
 class TestSSIM(MetricTester):
     atol = 6e-5
 
-    # TODO: for some reason this test hangs with ddp=True
-    # @pytest.mark.parametrize("ddp", [True, False])
-    @pytest.mark.parametrize("ddp", [False])
+    @pytest.mark.parametrize("ddp", [True, False])
     @pytest.mark.parametrize("dist_sync_on_step", [True, False])
     def test_ssim(self, preds, target, multichannel, ddp, dist_sync_on_step):
         self.run_class_metric_test(
diff --git a/tests/metrics/utils.py b/tests/metrics/utils.py
index c607a466b20683..4bd6608ce3fcf4 100644
--- a/tests/metrics/utils.py
+++ b/tests/metrics/utils.py
@@ -11,6 +11,11 @@
 
 from pytorch_lightning.metrics import Metric
 
+try:
+    set_start_method("spawn")
+except RuntimeError:
+    pass
+
 NUM_PROCESSES = 2
 NUM_BATCHES = 10
 BATCH_SIZE = 32
@@ -165,10 +170,7 @@ def setup_class(self):
         """Setup the metric class. This will spawn the pool of workers that are
         used for metric testing and setup_ddp
         """
-        try:
-            set_start_method("spawn")
-        except RuntimeError:
-            pass
+
         self.poolSize = NUM_PROCESSES
         self.pool = Pool(processes=self.poolSize)
         self.pool.starmap(setup_ddp, [(rank, self.poolSize) for rank in range(self.poolSize)])
diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py
index f10753491d447c..f41bf59bb4f4c2 100644
--- a/tests/models/data/horovod/train_default_model.py
+++ b/tests/models/data/horovod/train_default_model.py
@@ -74,9 +74,11 @@ def run_test_from_config(trainer_options):
     for dataloader in test_loaders:
         run_prediction(dataloader, pretrained_model)
 
-    # test HPC loading / saving
+    # test HPC saving
     trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger)
-    trainer.checkpoint_connector.hpc_load(ckpt_path, on_gpu=args.on_gpu)
+    # test HPC loading
+    checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(ckpt_path)
+    trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=args.on_gpu)
 
     if args.on_gpu:
         trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py
index a3919a6a8a7ddd..82727d37479b68 100644
--- a/tests/models/test_onnx.py
+++ b/tests/models/test_onnx.py
@@ -21,44 +21,44 @@
 import tests.base.develop_pipelines as tpipes
 import tests.base.develop_utils as tutils
 from pytorch_lightning import Trainer
-from tests.base import EvalModelTemplate
+from tests.base import BoringModel, EvalModelTemplate
 
 
 def test_model_saves_with_input_sample(tmpdir):
     """Test that ONNX model saves with input sample and size is greater than 3 MB"""
-    model = EvalModelTemplate()
+    model = BoringModel()
     trainer = Trainer(max_epochs=1)
     trainer.fit(model)
 
     file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = torch.randn((1, 28 * 28))
+    input_sample = torch.randn((1, 32))
     model.to_onnx(file_path, input_sample)
     assert os.path.isfile(file_path)
-    assert os.path.getsize(file_path) > 3e+06
+    assert os.path.getsize(file_path) > 4e2
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
 def test_model_saves_on_gpu(tmpdir):
     """Test that model saves on gpu"""
-    model = EvalModelTemplate()
+    model = BoringModel()
     trainer = Trainer(gpus=1, max_epochs=1)
     trainer.fit(model)
 
     file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = torch.randn((1, 28 * 28))
+    input_sample = torch.randn((1, 32))
     model.to_onnx(file_path, input_sample)
     assert os.path.isfile(file_path)
-    assert os.path.getsize(file_path) > 3e+06
+    assert os.path.getsize(file_path) > 4e2
 
 
 def test_model_saves_with_example_output(tmpdir):
     """Test that ONNX model saves when provided with example output"""
-    model = EvalModelTemplate()
+    model = BoringModel()
     trainer = Trainer(max_epochs=1)
     trainer.fit(model)
 
     file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = torch.randn((1, 28 * 28))
+    input_sample = torch.randn((1, 32))
     model.eval()
     example_outputs = model.forward(input_sample)
     model.to_onnx(file_path, input_sample, example_outputs=example_outputs)
@@ -67,11 +67,13 @@ def test_model_saves_with_example_output(tmpdir):
 
 def test_model_saves_with_example_input_array(tmpdir):
     """Test that ONNX model saves with_example_input_array and size is greater than 3 MB"""
-    model = EvalModelTemplate()
+    model = BoringModel()
+    model.example_input_array = torch.randn(5, 32)
+
     file_path = os.path.join(tmpdir, "model.onnx")
     model.to_onnx(file_path)
     assert os.path.exists(file_path) is True
-    assert os.path.getsize(file_path) > 3e+06
+    assert os.path.getsize(file_path) > 4e2
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@@ -100,7 +102,9 @@ def test_model_saves_on_multi_gpu(tmpdir):
 
 def test_verbose_param(tmpdir, capsys):
     """Test that output is present when verbose parameter is set"""
-    model = EvalModelTemplate()
+    model = BoringModel()
+    model.example_input_array = torch.randn(5, 32)
+
     file_path = os.path.join(tmpdir, "model.onnx")
     model.to_onnx(file_path, verbose=True)
     captured = capsys.readouterr()
@@ -108,8 +112,8 @@ def test_verbose_param(tmpdir, capsys):
 
 
 def test_error_if_no_input(tmpdir):
-    """Test that an exception is thrown when there is no input tensor"""
-    model = EvalModelTemplate()
+    """Test that an error is thrown when there is no input tensor"""
+    model = BoringModel()
     model.example_input_array = None
     file_path = os.path.join(tmpdir, "model.onnx")
     with pytest.raises(ValueError, match=r'Could not export to ONNX since neither `input_sample` nor'
@@ -117,21 +121,12 @@ def test_error_if_no_input(tmpdir):
         model.to_onnx(file_path)
 
 
-def test_error_if_input_sample_is_not_tensor(tmpdir):
-    """Test that an exception is thrown when there is no input tensor"""
-    model = EvalModelTemplate()
-    model.example_input_array = None
-    file_path = os.path.join(tmpdir, "model.onnx")
-    input_sample = np.random.randn(1, 28 * 28)
-    with pytest.raises(ValueError, match=f'Received `input_sample` of type {type(input_sample)}. Expected type is '
-                                         f'`Tensor`'):
-        model.to_onnx(file_path, input_sample)
-
-
 def test_if_inference_output_is_valid(tmpdir):
     """Test that the output inferred from ONNX model is same as from PyTorch"""
-    model = EvalModelTemplate()
-    trainer = Trainer(max_epochs=5)
+    model = BoringModel()
+    model.example_input_array = torch.randn(5, 32)
+
+    trainer = Trainer(max_epochs=2)
     trainer.fit(model)
 
     model.eval()
diff --git a/tests/models/test_torchscript.py b/tests/models/test_torchscript.py
index bf2c34b8bfef5b..3c43b201f52e4c 100644
--- a/tests/models/test_torchscript.py
+++ b/tests/models/test_torchscript.py
@@ -16,43 +16,72 @@
 import pytest
 import torch
 
-from tests.base import EvalModelTemplate
+from tests.base import BoringModel
 from tests.base.datamodules import TrialMNISTDataModule
 from tests.base.models import ParityModuleRNN, BasicGAN
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
 def test_torchscript_input_output(modelclass):
     """ Test that scripted LightningModule forward works. """
     model = modelclass()
+
+    if isinstance(model, BoringModel):
+        model.example_input_array = torch.randn(5, 32)
+
     script = model.to_torchscript()
     assert isinstance(script, torch.jit.ScriptModule)
+
     model.eval()
-    model_output = model(model.example_input_array)
+    with torch.no_grad():
+        model_output = model(model.example_input_array)
+
     script_output = script(model.example_input_array)
     assert torch.allclose(script_output, model_output)
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
-def test_torchscript_input_output_trace(modelclass):
-    """ Test that traced LightningModule forward works. """
+def test_torchscript_example_input_output_trace(modelclass):
+    """ Test that traced LightningModule forward works with example_input_array """
     model = modelclass()
+
+    if isinstance(model, BoringModel):
+        model.example_input_array = torch.randn(5, 32)
+
     script = model.to_torchscript(method='trace')
     assert isinstance(script, torch.jit.ScriptModule)
+
     model.eval()
-    model_output = model(model.example_input_array)
+    with torch.no_grad():
+        model_output = model(model.example_input_array)
+
     script_output = script(model.example_input_array)
     assert torch.allclose(script_output, model_output)
 
 
+def test_torchscript_input_output_trace():
+    """ Test that traced LightningModule forward works with example_inputs """
+    model = BoringModel()
+    example_inputs = torch.randn(1, 32)
+    script = model.to_torchscript(example_inputs=example_inputs, method='trace')
+    assert isinstance(script, torch.jit.ScriptModule)
+
+    model.eval()
+    with torch.no_grad():
+        model_output = model(example_inputs)
+
+    script_output = script(example_inputs)
+    assert torch.allclose(script_output, model_output)
+
+
 @pytest.mark.parametrize("device", [
     torch.device("cpu"),
     torch.device("cuda", 0)
@@ -60,7 +89,9 @@ def test_torchscript_input_output_trace(modelclass):
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU machine")
 def test_torchscript_device(device):
     """ Test that scripted module is on the correct device. """
-    model = EvalModelTemplate().to(device)
+    model = BoringModel().to(device)
+    model.example_input_array = torch.randn(5, 32)
+
     script = model.to_torchscript()
     assert next(script.parameters()).device == device
     script_output = script(model.example_input_array.to(device))
@@ -69,7 +100,7 @@ def test_torchscript_device(device):
 
 def test_torchscript_retain_training_state():
     """ Test that torchscript export does not alter the training mode of original model. """
-    model = EvalModelTemplate()
+    model = BoringModel()
     model.train(True)
     script = model.to_torchscript()
     assert model.training
@@ -81,7 +112,7 @@ def test_torchscript_retain_training_state():
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
@@ -100,7 +131,7 @@ def test_torchscript_properties(modelclass):
 
 
 @pytest.mark.parametrize("modelclass", [
-    EvalModelTemplate,
+    BoringModel,
     ParityModuleRNN,
     BasicGAN,
 ])
@@ -109,9 +140,27 @@ def test_torchscript_properties(modelclass):
     reason="torch.save/load has bug loading script modules on torch <= 1.4",
 )
 def test_torchscript_save_load(tmpdir, modelclass):
-    """ Test that scripted LightningModules is correctly saved and can be loaded. """
+    """ Test that scripted LightningModule is correctly saved and can be loaded. """
     model = modelclass()
     output_file = str(tmpdir / "model.pt")
     script = model.to_torchscript(file_path=output_file)
     loaded_script = torch.jit.load(output_file)
     assert torch.allclose(next(script.parameters()), next(loaded_script.parameters()))
+
+
+def test_torchcript_invalid_method(tmpdir):
+    """Test that an error is thrown with invalid torchscript method"""
+    model = BoringModel()
+    model.train(True)
+
+    with pytest.raises(ValueError, match="only supports 'script' or 'trace'"):
+        model.to_torchscript(method='temp')
+
+
+def test_torchscript_with_no_input(tmpdir):
+    """Test that an error is thrown when there is no input tensor"""
+    model = BoringModel()
+    model.example_input_array = None
+
+    with pytest.raises(ValueError, match='requires either `example_inputs` or `model.example_input_array`'):
+        model.to_torchscript(method='trace')
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index e838dc60d81b31..37ab774bc83421 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -244,39 +244,6 @@ def test_distributed_backend_set_when_using_tpu(tmpdir, tpu_cores):
     assert Trainer(tpu_cores=tpu_cores).distributed_backend == "tpu"
 
 
-@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
-@pl_multi_process_test
-def test_result_obj_on_tpu(tmpdir):
-    seed_everything(1234)
-
-    batches = 5
-    epochs = 2
-
-    model = EvalModelTemplate()
-    model.training_step = model.training_step_result_obj
-    model.training_step_end = None
-    model.training_epoch_end = None
-    model.validation_step = model.validation_step_result_obj
-    model.validation_step_end = None
-    model.validation_epoch_end = None
-    model.test_step = model.test_step_result_obj
-    model.test_step_end = None
-    model.test_epoch_end = None
-
-    trainer_options = dict(
-        default_root_dir=tmpdir,
-        max_epochs=epochs,
-        callbacks=[EarlyStopping()],
-        log_every_n_steps=2,
-        limit_train_batches=batches,
-        weights_summary=None,
-        tpu_cores=8
-    )
-
-    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
-
-
 @pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
 @pl_multi_process_test
 def test_broadcast_on_tpu():
diff --git a/tests/special_tests.sh b/tests/special_tests.sh
index f7cb5819517839..950e3776bbc7fa 100644
--- a/tests/special_tests.sh
+++ b/tests/special_tests.sh
@@ -19,4 +19,4 @@ python ${DEFAULTS} tests/plugins/test_rpc_plugin.py::test_rpc_function_calls_ddp
 python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_manual
 python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_manual_amp
 python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_automatic
-# python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_with_wrong_balance
+python ${DEFAULTS} tests/trainer/logging_tests/test_train_loop_logging_1_0.py::test_logging_sync_dist_true_ddp
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index 3bce379c1115c2..91a8631a732870 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -1,6 +1,20 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
-import time
 from pathlib import Path
+import time
 
 import numpy as np
 import pytest
diff --git a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py
index 0c27d8909d760f..51b9c2ac69496d 100644
--- a/tests/trainer/logging_tests/test_train_loop_logging_1_0.py
+++ b/tests/trainer/logging_tests/test_train_loop_logging_1_0.py
@@ -18,6 +18,7 @@
 import collections
 import itertools
 import os
+import platform
 from unittest import mock
 
 import numpy as np
@@ -26,8 +27,8 @@
 from torch.utils.data import Dataset
 
 import pytorch_lightning as pl
-from pytorch_lightning import Trainer, callbacks
-from pytorch_lightning.callbacks import ModelCheckpoint
+from pytorch_lightning import callbacks, Trainer
+from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
 from pytorch_lightning.core.lightning import LightningModule
 from tests.base.boring_model import BoringModel, RandomDictDataset, RandomDictStringDataset
 from tests.base.deterministic_model import DeterministicModel
@@ -685,6 +686,7 @@ class TestModel(BoringModel):
         def training_step(self, batch, batch_idx):
             acc = self.step(batch[0])
             self.log('foo', torch.tensor(fake_result), on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum')
+            self.log('foo_2', 2, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='sum')
             return acc
 
         def validation_step(self, batch, batch_idx):
@@ -704,9 +706,46 @@ def validation_step(self, batch, batch_idx):
     trainer.fit(model)
 
     assert trainer.logged_metrics['foo'] == fake_result
+    assert trainer.logged_metrics['foo_2'] == 2
     assert trainer.logged_metrics['bar'] == fake_result
 
 
+@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
+@pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
+                    reason="test should be run outside of pytest")
+def test_logging_sync_dist_true_ddp(tmpdir):
+    """
+    Tests to ensure that the sync_dist flag works with ddp
+    """
+    class TestLoggingSyncDistModel(BoringModel):
+        def training_step(self, batch, batch_idx):
+            acc = self.step(batch[0])
+            self.log('foo', 1, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='SUM')
+            return acc
+
+        def validation_step(self, batch, batch_idx):
+            self.training_step_called = True
+            output = self.layer(batch)
+            loss = self.loss(batch, output)
+            self.log('bar', 2, on_step=False, on_epoch=True, sync_dist=True, sync_dist_op='AVG')
+            return {"x": loss}
+
+    model = TestLoggingSyncDistModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=1,
+        limit_val_batches=1,
+        max_epochs=2,
+        weights_summary=None,
+        accelerator="ddp",
+        gpus=2,
+    )
+    trainer.fit(model)
+
+    assert trainer.logged_metrics['foo'] == 2
+    assert trainer.logged_metrics['bar'] == 2
+
+
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
 def test_logging_sync_dist_true_gpu(tmpdir):
     """
@@ -771,3 +810,48 @@ def on_train_epoch_end(self, *_):
     trainer.fit(model)
     assert model.epoch_end_called
     assert model.on_train_epoch_end_called
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="requires GPU machine")
+def test_metric_are_properly_reduced(tmpdir):
+    class TestingModel(BoringModel):
+        def __init__(self, *args, **kwargs):
+            super().__init__()
+            self.train_acc = pl.metrics.Accuracy()
+            self.val_acc = pl.metrics.Accuracy()
+
+        def training_step(self, batch, batch_idx):
+            self.train_acc(torch.rand(1, 3, device=self.device), torch.randint(0, 2, (1,), device=self.device))
+            self.log('train_acc', self.train_acc, on_step=True, on_epoch=True)
+            return super().training_step(batch, batch_idx)
+
+        def validation_step(self, batch, batch_idx):
+            preds = torch.tensor(0, device=self.device)
+            targets = torch.tensor(1, device=self.device)
+            if batch_idx < 8:
+                targets = preds
+            self.val_acc(preds, targets)
+            self.log('val_acc', self.val_acc, on_step=True, on_epoch=True)
+            return super().validation_step(batch, batch_idx)
+
+    early_stop = EarlyStopping(monitor='val_acc', mode='max')
+
+    checkpoint = ModelCheckpoint(
+        monitor='val_acc',
+        save_last=True,
+        save_top_k=2,
+        mode='max',
+    )
+
+    model = TestingModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        gpus=1,
+        max_epochs=2,
+        limit_train_batches=5,
+        limit_val_batches=32,
+        callbacks=[early_stop, checkpoint])
+    trainer.fit(model)
+
+    assert trainer.callback_metrics["val_acc"] == 8 / 32.
+    assert "train_acc" in trainer.callback_metrics
diff --git a/tests/trainer/optimization/test_multiple_optimizers.py b/tests/trainer/optimization/test_multiple_optimizers.py
new file mode 100644
index 00000000000000..78b6f8f7ff84a3
--- /dev/null
+++ b/tests/trainer/optimization/test_multiple_optimizers.py
@@ -0,0 +1,63 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Tests to ensure that the behaviours related to multiple optimizers works
+"""
+import torch
+
+import pytorch_lightning as pl
+from tests.base.boring_model import BoringModel
+
+
+def test_unbalanced_logging_with_multiple_optimizers(tmpdir):
+    """
+    This tests ensures reduction works in un-balanced logging settings
+    """
+    class TestModel(BoringModel):
+
+        loss_1 = []
+        loss_2 = []
+
+        def training_step(self, batch, batch_idx, optimizer_idx):
+            output = self.layer(batch)
+            loss = self.loss(batch, output)
+            if optimizer_idx == 0 and self.trainer.global_step > 10:
+                self.log("loss_1", loss, on_epoch=True, prog_bar=True)
+                self.loss_1.append(loss.detach().clone())
+            elif optimizer_idx == 1:
+                self.log("loss_2", loss, on_epoch=True, prog_bar=True)
+                self.loss_2.append(loss.detach().clone())
+            return {"loss": loss}
+
+        def configure_optimizers(self):
+            optimizer = torch.optim.SGD(self.layer.parameters(), lr=0.001)
+            optimizer2 = torch.optim.SGD(self.layer.parameters(), lr=0.001)
+            return [optimizer, optimizer2]
+
+    model = TestModel()
+    model.training_epoch_end = None
+
+    # Initialize a trainer
+    trainer = pl.Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+    )
+
+    trainer.fit(model)
+
+    assert torch.equal(trainer.callback_metrics["loss_2_step"], model.loss_2[-1])
+    assert torch.equal(trainer.callback_metrics["loss_1_step"], model.loss_1[-1])
+    # test loss are properly reduced
+    assert torch.abs(trainer.callback_metrics["loss_2_epoch"] - torch.FloatTensor(model.loss_2).mean()) < 1e-6
+    assert torch.abs(trainer.callback_metrics["loss_1_epoch"] - torch.FloatTensor(model.loss_1).mean()) < 1e-6
diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py
index 2e76192836740c..52e085b2b7b8cb 100644
--- a/tests/trainer/test_optimizers.py
+++ b/tests/trainer/test_optimizers.py
@@ -15,7 +15,6 @@
 import torch
 
 from pytorch_lightning import Callback, Trainer
-from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.base import EvalModelTemplate
 from tests.base.boring_model import BoringModel
@@ -177,6 +176,7 @@ def test_reducelronplateau_scheduling(tmpdir):
         frequency=1,
         reduce_on_plateau=True,
         strict=True,
+        name=None,
     ), 'lr scheduler was not correctly converted to dict'
 
 
@@ -215,7 +215,13 @@ def test_optimizer_return_options(enable_pl_optimizer):
     assert len(freq) == 0
     assert optim[0] == opt_a
     assert lr_sched[0] == dict(
-        scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor=None, strict=True
+        scheduler=scheduler_a,
+        interval='epoch',
+        frequency=1,
+        reduce_on_plateau=False,
+        monitor=None,
+        strict=True,
+        name=None,
     )
 
     # opt tuple of 1 list
@@ -225,7 +231,13 @@ def test_optimizer_return_options(enable_pl_optimizer):
     assert len(freq) == 0
     assert optim[0] == opt_a
     assert lr_sched[0] == dict(
-        scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor=None, strict=True
+        scheduler=scheduler_a,
+        interval='epoch',
+        frequency=1,
+        reduce_on_plateau=False,
+        monitor=None,
+        strict=True,
+        name=None,
     )
 
     # opt single dictionary
@@ -235,7 +247,13 @@ def test_optimizer_return_options(enable_pl_optimizer):
     assert len(freq) == 0
     assert optim[0] == opt_a
     assert lr_sched[0] == dict(
-        scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor=None, strict=True
+        scheduler=scheduler_a,
+        interval='epoch',
+        frequency=1,
+        reduce_on_plateau=False,
+        monitor=None,
+        strict=True,
+        name=None,
     )
 
     # opt multiple dictionaries with frequencies
@@ -247,7 +265,13 @@ def test_optimizer_return_options(enable_pl_optimizer):
     assert len(optim) == len(lr_sched) == len(freq) == 2
     assert optim[0] == opt_a
     assert lr_sched[0] == dict(
-        scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor=None, strict=True
+        scheduler=scheduler_a,
+        interval='epoch',
+        frequency=1,
+        reduce_on_plateau=False,
+        monitor=None,
+        strict=True,
+        name=None,
     )
     assert freq == [1, 5]
 
diff --git a/tests/trainer/test_supporters.py b/tests/trainer/test_supporters.py
new file mode 100644
index 00000000000000..b8a0e066cdef89
--- /dev/null
+++ b/tests/trainer/test_supporters.py
@@ -0,0 +1,38 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+
+from pytorch_lightning.trainer.supporters import TensorRunningAccum
+
+
+def test_tensor_running_accum_reset():
+    """ Test that reset would set all attributes to the initialization state """
+
+    window_length = 10
+
+    accum = TensorRunningAccum(window_length=window_length)
+    assert accum.last() is None
+    assert accum.mean() is None
+
+    accum.append(torch.tensor(1.5))
+    assert accum.last() == torch.tensor(1.5)
+    assert accum.mean() == torch.tensor(1.5)
+
+    accum.reset()
+    assert accum.window_length == window_length
+    assert accum.memory is None
+    assert accum.current_idx == 0
+    assert accum.last_idx is None
+    assert not accum.rotated
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 9b29d6ec2b1dd6..9e5ceccf9b646b 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -11,12 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import glob
 import math
 import os
 import pickle
 import sys
-import types
 from argparse import Namespace
 from copy import deepcopy
 from pathlib import Path
@@ -34,6 +32,7 @@
 from pytorch_lightning.loggers import TensorBoardLogger
 from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, SimpleProfiler
 from pytorch_lightning.trainer.logging import TrainerLoggingMixin
+from pytorch_lightning.trainer.states import TrainerState
 from pytorch_lightning.utilities import NATIVE_AMP_AVAILABLE
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -61,6 +60,7 @@ def test_no_val_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt):
     result = trainer.fit(model)
     # training complete
     assert result == 1, "amp + ddp model failed to complete"
+    assert trainer.state == TrainerState.FINISHED
 
     # save model
     new_weights_path = os.path.join(tmpdir, "save_test.ckpt")
@@ -107,6 +107,7 @@ def test_no_val_end_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt):
 
     # traning complete
     assert result == 1, "amp + ddp model failed to complete"
+    assert trainer.state == TrainerState.FINISHED
 
     # save model
     new_weights_path = os.path.join(tmpdir, "save_test.ckpt")
@@ -151,6 +152,7 @@ def test_strict_model_load(monkeypatch, tmpdir, tmpdir_server, url_ckpt):
 
     # traning complete
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
 
     # save model
     new_weights_path = os.path.join(tmpdir, "save_test.ckpt")
@@ -468,6 +470,7 @@ def test_model_checkpoint_only_weights(tmpdir):
     result = trainer.fit(model)
     # training complete
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
 
     checkpoint_path = list(trainer.checkpoint_callback.best_k_models.keys())[0]
 
@@ -507,35 +510,23 @@ def test_resume_from_checkpoint_epoch_restored(monkeypatch, tmpdir, tmpdir_serve
     # set $TORCH_HOME, which determines torch hub's cache path, to tmpdir
     monkeypatch.setenv("TORCH_HOME", tmpdir)
 
-    hparams = EvalModelTemplate.get_default_hparams()
-
-    def _new_model():
-        # Create a model that tracks epochs and batches seen
-        model = EvalModelTemplate(**hparams)
-        model.num_epochs_seen = 0
-        model.num_batches_seen = 0
-        model.num_on_load_checkpoint_called = 0
+    class TestModel(BoringModel):
+        # Model that tracks epochs and batches seen
+        num_epochs_seen = 0
+        num_batches_seen = 0
+        num_on_load_checkpoint_called = 0
 
-        def increment_epoch(self):
+        def on_epoch_end(self):
             self.num_epochs_seen += 1
 
-        def increment_batch(self, batch, batch_idx, dataloader_idx):
+        def on_train_batch_start(self, *_):
             self.num_batches_seen += 1
 
-        def increment_on_load_checkpoint(self, _):
+        def on_load_checkpoint(self, _):
             self.num_on_load_checkpoint_called += 1
 
-        # Bind methods to keep track of epoch numbers, batch numbers it has seen
-        # as well as number of times it has called on_load_checkpoint()
-        model.on_epoch_end = types.MethodType(increment_epoch, model)
-        model.on_train_batch_start = types.MethodType(increment_batch, model)
-        model.on_load_checkpoint = types.MethodType(increment_on_load_checkpoint, model)
-        return model
-
-    model = _new_model()
-
-    trainer_options = dict(
-        progress_bar_refresh_rate=0,
+    model = TestModel()
+    trainer = Trainer(
         max_epochs=2,
         limit_train_batches=0.65,
         limit_val_batches=1,
@@ -543,144 +534,125 @@ def increment_on_load_checkpoint(self, _):
         default_root_dir=tmpdir,
         val_check_interval=1.0,
         enable_pl_optimizer=enable_pl_optimizer,
+        progress_bar_refresh_rate=0,
+        logger=False,
+        weights_summary=None,
     )
-
-    trainer = Trainer(**trainer_options)
-    # fit model
     trainer.fit(model)
 
-    training_batches = trainer.num_training_batches
-
     assert model.num_epochs_seen == 2
-    assert model.num_batches_seen == training_batches * 2
+    assert model.num_batches_seen == trainer.num_training_batches * 2
     assert model.num_on_load_checkpoint_called == 0
 
     # Other checkpoints can be uncommented if/when resuming mid-epoch is supported
-    checkpoints = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))
+    checkpoints = Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt")
     if url_ckpt:
         # transform local paths into url checkpoints
         ip, port = tmpdir_server
-        checkpoints = [f"http://{ip}:{port}/" + os.path.basename(check) for check in checkpoints]
+        checkpoints = [f"http://{ip}:{port}/" + ckpt.name for ckpt in checkpoints]
 
-    for check in checkpoints:
-        next_model = _new_model()
-        state = pl_load(check)
+    for ckpt in checkpoints:
+        next_model = TestModel()
+        state = pl_load(ckpt)
 
         # Resume training
-        trainer_options["max_epochs"] = 2
-        new_trainer = Trainer(**trainer_options, resume_from_checkpoint=check)
+        new_trainer = Trainer(resume_from_checkpoint=ckpt, max_epochs=2)
         new_trainer.fit(next_model)
-        assert state["global_step"] + next_model.num_batches_seen == training_batches * trainer_options["max_epochs"]
+        assert state["global_step"] + next_model.num_batches_seen == trainer.num_training_batches * trainer.max_epochs
         assert next_model.num_on_load_checkpoint_called == 1
 
 
-def _init_steps_model():
-    """private method for initializing a model with 5% train epochs"""
-    model = EvalModelTemplate()
-
-    # define train epoch to 5% of data
-    train_percent = 0.5
-    # get number of samples in 1 epoch
-    num_train_samples = math.floor(len(model.train_dataloader()) * train_percent)
-
-    trainer_options = dict(
-        limit_train_batches=train_percent,
-    )
-    return model, trainer_options, num_train_samples
-
-
 def test_trainer_max_steps_and_epochs(tmpdir):
     """Verify model trains according to specified max steps"""
-    model, trainer_options, num_train_samples = _init_steps_model()
+    model = BoringModel()
+    num_train_samples = math.floor(len(model.train_dataloader()) * 0.5)
 
     # define less train steps than epochs
-    trainer_options.update(
-        default_root_dir=tmpdir,
-        max_epochs=3,
-        max_steps=num_train_samples + 10,
-    )
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    trainer_kwargs = {
+        'limit_train_batches': 0.5,
+        'default_root_dir': tmpdir,
+        'max_epochs': 3,
+        'max_steps': num_train_samples + 10,
+        'logger': False,
+        'weights_summary': None,
+        'progress_bar_refresh_rate': 0,
+    }
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check training stopped at max_steps
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.global_step == trainer.max_steps, "Model did not stop at max_steps"
 
     # define less train epochs than steps
-    trainer_options.update(
-        max_epochs=2,
-        max_steps=trainer_options["max_epochs"] * 2 * num_train_samples,
-    )
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    trainer_kwargs['max_epochs'] = 2
+    trainer_kwargs['max_steps'] = 3 * 2 * num_train_samples
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check training stopped at max_epochs
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.global_step == num_train_samples * trainer.max_epochs
     assert trainer.current_epoch == trainer.max_epochs - 1, "Model did not stop at max_epochs"
 
 
 def test_trainer_min_steps_and_epochs(tmpdir):
     """Verify model trains according to specified min steps"""
-    model, trainer_options, num_train_samples = _init_steps_model()
-
-    # define callback for stopping the model and default epochs
-    trainer_options.update(
-        default_root_dir=tmpdir,
-        callbacks=[EarlyStopping(monitor="early_stop_on", min_delta=1.0)],
-        val_check_interval=2,
-        min_epochs=1,
-        max_epochs=7,
-    )
-
-    # define less min steps than 1 epoch
-    trainer_options["min_steps"] = math.floor(num_train_samples / 2)
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    model = EvalModelTemplate()
+    num_train_samples = math.floor(len(model.train_dataloader()) * 0.5)
+
+    trainer_kwargs = {
+        'limit_train_batches': 0.5,
+        'default_root_dir': tmpdir,
+        # define callback for stopping the model
+        'callbacks': [EarlyStopping(monitor="early_stop_on", min_delta=1.0)],
+        'val_check_interval': 2,
+        'min_epochs': 1,
+        'max_epochs': 7,
+        # define less min steps than 1 epoch
+        'min_steps': num_train_samples // 2,
+        'logger': False,
+        'weights_summary': None,
+        'progress_bar_refresh_rate': 0,
+    }
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check model ran for at least min_epochs
-    assert (
-        trainer.global_step >= num_train_samples and trainer.current_epoch > 0
-    ), "Model did not train for at least min_epochs"
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
+    assert trainer.current_epoch > 0
+    assert trainer.global_step >= num_train_samples, "Model did not train for at least min_epochs"
 
     # define less epochs than min_steps
-    trainer_options["min_steps"] = math.floor(num_train_samples * 1.5)
-
-    # fit model
-    trainer = Trainer(**trainer_options)
+    trainer_kwargs["min_steps"] = math.floor(num_train_samples * 1.5)
+    trainer = Trainer(**trainer_kwargs)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check model ran for at least num_train_samples*1.5
-    assert (
-        trainer.global_step >= math.floor(num_train_samples * 1.5) and trainer.current_epoch > 0
-    ), "Model did not train for at least min_steps"
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
+    assert trainer.current_epoch > 0
+    assert trainer.global_step >= math.floor(num_train_samples * 1.5), "Model did not train for at least min_steps"
 
 
 def test_trainer_max_steps_accumulate_batches(tmpdir):
     """Verify model trains according to specified max steps with grad accumulated batches"""
-    model, trainer_options, num_train_samples = _init_steps_model()
+    model = BoringModel()
+    num_train_samples = math.floor(len(model.train_dataloader()) * 0.5)
 
     # define less train steps than epochs
-    trainer_options.update(
+    trainer = Trainer(
+        limit_train_batches=0.5,
         default_root_dir=tmpdir,
-        max_steps=(num_train_samples + 10),
+        max_steps=num_train_samples + 10,
         accumulate_grad_batches=10,
+        logger=False,
+        weights_summary=None,
+        progress_bar_refresh_rate=0,
     )
-
-    # fit model
-    trainer = Trainer(**trainer_options)
     result = trainer.fit(model)
-    assert result == 1, "Training did not complete"
 
-    # check training stopped at max_steps
+    assert result == 1, "Training did not complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.global_step == trainer.max_steps, "Model did not stop at max_steps"
 
 
@@ -703,6 +675,7 @@ def test_benchmark_option(tmpdir):
 
     # verify training completed
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
 
     # verify torch.backends.cudnn.benchmark is not turned off
     assert torch.backends.cudnn.benchmark
@@ -788,6 +761,7 @@ def training_epoch_end(self, *args, **kwargs):
 
     # check that limit_train_batches=0 turns off training
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 0
     assert not model.training_step_invoked, "`training_step` should not run when `limit_train_batches=0`"
     assert not model.training_epoch_end_invoked, "`training_epoch_end` should not run when `limit_train_batches=0`"
@@ -806,6 +780,7 @@ def training_epoch_end(self, *args, **kwargs):
         assert not torch.all(torch.eq(before_state_dict[key], after_state_dict[key]))
 
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 0
     assert model.training_step_invoked, "did not run `training_step` with `fast_dev_run=True`"
     assert model.training_epoch_end_invoked, "did not run `training_epoch_end` with `fast_dev_run=True`"
@@ -844,6 +819,7 @@ def validation_epoch_end(self, *args, **kwargs):
 
     # check that limit_val_batches=0 turns off validation
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 1
     assert not model.validation_step_invoked, "`validation_step` should not run when `limit_val_batches=0`"
     assert not model.validation_epoch_end_invoked, "`validation_epoch_end` should not run when `limit_val_batches=0`"
@@ -855,6 +831,7 @@ def validation_epoch_end(self, *args, **kwargs):
     result = trainer.fit(model)
 
     assert result == 1, "training failed to complete"
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.current_epoch == 0
     assert model.validation_step_invoked, "did not run `validation_step` with `fast_dev_run=True`"
     assert model.validation_epoch_end_invoked, "did not run `validation_epoch_end` with `fast_dev_run=True`"
@@ -958,6 +935,7 @@ def test_gradient_clipping(tmpdir):
     """
     Test gradient clipping
     """
+    tutils.reset_seed()
 
     model = EvalModelTemplate()
 
@@ -995,6 +973,7 @@ def test_gradient_clipping_fp16(tmpdir):
     """
     Test gradient clipping with fp16
     """
+    tutils.reset_seed()
 
     model = EvalModelTemplate()
 
@@ -1117,7 +1096,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
 @pytest.mark.parametrize(
     "trainer_kwargs,expected",
     [
-        pytest.param(
+        (
             dict(accelerator=None, gpus=None),
             dict(
                 use_dp=False,
@@ -1129,7 +1108,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=None),
             dict(
                 use_dp=False,
@@ -1141,7 +1120,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=None),
             dict(
                 use_dp=False,
@@ -1153,7 +1132,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", gpus=None),
             dict(
                 use_dp=False,
@@ -1165,7 +1144,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", num_processes=2, gpus=None),
             dict(
                 use_dp=False,
@@ -1177,7 +1156,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=2,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", num_nodes=2, gpus=None),
             dict(
                 use_dp=False,
@@ -1189,7 +1168,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=None),
             dict(
                 use_dp=False,
@@ -1201,7 +1180,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=2,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp2", gpus=None),
             dict(
                 use_dp=False,
@@ -1213,7 +1192,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 num_processes=1,
             ),
         ),
-        pytest.param(
+        (
             dict(accelerator=None, gpus=1),
             dict(
                 use_dp=False,
@@ -1224,9 +1203,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=True,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=1),
             dict(
                 use_dp=True,
@@ -1237,9 +1215,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=True,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", gpus=1),
             dict(
                 use_dp=False,
@@ -1250,9 +1227,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=True,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=1),
             dict(
                 use_dp=False,
@@ -1263,9 +1239,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=2,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp2", gpus=1),
             dict(
                 use_dp=False,
@@ -1276,9 +1251,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator=None, gpus=2),
             dict(
                 use_dp=False,
@@ -1289,9 +1263,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=2,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="dp", gpus=2),
             dict(
                 use_dp=True,
@@ -1302,9 +1275,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp", gpus=2),
             dict(
                 use_dp=False,
@@ -1315,9 +1287,8 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=2,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
-        pytest.param(
+        (
             dict(accelerator="ddp2", gpus=2),
             dict(
                 use_dp=False,
@@ -1328,21 +1299,17 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
                 use_single_gpu=False,
                 num_processes=1,
             ),
-            marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
     ],
 )
-# Todo: mock nb Gpus so all these tests can run on any device
-# todo: think about simplification, that the the expected will be just a list use_xxx which shall be true...
-def test_trainer_config(trainer_kwargs, expected):
+def test_trainer_config(trainer_kwargs, expected, monkeypatch):
+    if trainer_kwargs["gpus"] is not None:
+        monkeypatch.setattr(torch.cuda, "is_available", lambda: True)
+        monkeypatch.setattr(torch.cuda, "device_count", lambda: trainer_kwargs["gpus"])
     trainer = Trainer(**trainer_kwargs)
-    assert trainer.use_dp is expected["use_dp"], 'for input: %s' % trainer_kwargs
-    assert trainer.use_ddp is expected["use_ddp"], 'for input: %s' % trainer_kwargs
-    assert trainer.use_ddp2 is expected["use_ddp2"], 'for input: %s' % trainer_kwargs
-    assert trainer.num_gpus == expected["num_gpus"], 'for input: %s' % trainer_kwargs
-    assert trainer.on_gpu is expected["on_gpu"], 'for input: %s' % trainer_kwargs
-    assert trainer.use_single_gpu is expected["use_single_gpu"], 'for input: %s' % trainer_kwargs
-    assert trainer.num_processes == expected["num_processes"], 'for input: %s' % trainer_kwargs
+    assert len(expected) == 7
+    for k, v in expected.items():
+        assert getattr(trainer, k) == v, f"Failed {k}: {v}"
 
 
 def test_trainer_subclassing():
@@ -1358,6 +1325,7 @@ def __init__(self, custom_arg, *args, custom_kwarg="test", **kwargs):
     trainer = TrainerSubclass(123, custom_kwarg="custom", fast_dev_run=True)
     result = trainer.fit(model)
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.custom_arg == 123
     assert trainer.custom_kwarg == "custom"
     assert trainer.fast_dev_run
@@ -1373,6 +1341,7 @@ def __init__(self, **kwargs):
     trainer = TrainerSubclass(custom_kwarg="custom", fast_dev_run=True)
     result = trainer.fit(model)
     assert result == 1
+    assert trainer.state == TrainerState.FINISHED
     assert trainer.custom_kwarg == "custom"
     assert trainer.fast_dev_run