From d577f461a49e8138bb01417727e3032ff01d4c42 Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Wed, 10 Nov 2021 21:05:48 +0530
Subject: [PATCH 01/18] Remove deprecated
 `utilities.distributed.rank_zero_{warn,deprecation}` (#10451)

---
 CHANGELOG.md                               |  3 +++
 pytorch_lightning/callbacks/lr_monitor.py  |  3 +--
 pytorch_lightning/utilities/distributed.py | 20 --------------------
 tests/deprecated_api/test_remove_1-6.py    |  8 --------
 4 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 495c9e2398df0..0082201aa1cf9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -110,6 +110,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
   * ([#10448](https://github.com/PyTorchLightning/pytorch-lightning/pull/10448))
 
 
+- Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451))
+
+
 ### Fixed
 
 - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702))
diff --git a/pytorch_lightning/callbacks/lr_monitor.py b/pytorch_lightning/callbacks/lr_monitor.py
index c9875cae83e62..d72f42d8f8616 100644
--- a/pytorch_lightning/callbacks/lr_monitor.py
+++ b/pytorch_lightning/callbacks/lr_monitor.py
@@ -27,8 +27,7 @@
 
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks.base import Callback
-from pytorch_lightning.utilities import rank_zero_warn
-from pytorch_lightning.utilities.distributed import rank_zero_deprecation
+from pytorch_lightning.utilities import rank_zero_deprecation, rank_zero_warn
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py
index b99f5213d02d8..1740518923c0f 100644
--- a/pytorch_lightning/utilities/distributed.py
+++ b/pytorch_lightning/utilities/distributed.py
@@ -66,26 +66,6 @@ def _get_rank() -> int:
 rank_zero_only.rank = getattr(rank_zero_only, "rank", _get_rank())
 
 
-def rank_zero_warn(*args: Any, stacklevel: int = 5, **kwargs: Any) -> None:
-    from pytorch_lightning.utilities.warnings import rank_zero_deprecation, rank_zero_warn
-
-    rank_zero_deprecation(
-        "`pytorch_lightning.utilities.distributed.rank_zero_warn` has been moved to"
-        " `pytorch_lightning.utilities.rank_zero_warn` in v1.3.7 and will be removed in v1.6"
-    )
-    return rank_zero_warn(*args, stacklevel=stacklevel, **kwargs)
-
-
-def rank_zero_deprecation(*args: Any, stacklevel: int = 5, **kwargs: Any) -> None:
-    from pytorch_lightning.utilities.warnings import rank_zero_deprecation
-
-    rank_zero_deprecation(
-        "`pytorch_lightning.utilities.distributed.rank_zero_deprecation` has been moved to"
-        " `pytorch_lightning.utilities.rank_zero_deprecation` in v1.3.7 and will be removed in v1.6"
-    )
-    return rank_zero_deprecation(*args, stacklevel=stacklevel, **kwargs)
-
-
 def _info(*args: Any, stacklevel: int = 2, **kwargs: Any) -> None:
     if python_version() >= "3.8.0":
         kwargs["stacklevel"] = stacklevel
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index a64d28ebefecc..686339df6317c 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -17,7 +17,6 @@
 import pytest
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.utilities.distributed import rank_zero_deprecation, rank_zero_warn
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.model_summary import ModelSummary
 from tests.helpers import BoringModel
@@ -78,13 +77,6 @@ def test_v1_6_0_train_loop(tmpdir):
         _ = trainer.train_loop
 
 
-def test_v1_6_0_rank_zero_warnings_moved():
-    with pytest.deprecated_call(match="in v1.3.7 and will be removed in v1.6"):
-        rank_zero_warn("test")
-    with pytest.deprecated_call(match="in v1.3.7 and will be removed in v1.6"):
-        rank_zero_deprecation("test")
-
-
 def test_v1_6_0_deprecated_model_summary_mode(tmpdir):
     model = BoringModel()
     with pytest.deprecated_call(match="Argument `mode` in `ModelSummary` is deprecated in v1.4"):

From d2aaf6b4cc420a4ef2aa4d1db29a0e881cea9406 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Wed, 10 Nov 2021 17:59:10 +0100
Subject: [PATCH 02/18] Upgrade CI after the 1.10 release (#10075)

---
 .github/workflows/ci_dockers.yml       |   8 +-
 .github/workflows/ci_pkg-install.yml   |   4 +-
 .github/workflows/ci_schema.yml        |   4 +-
 .github/workflows/ci_test-base.yml     |   5 +-
 .github/workflows/ci_test-conda.yml    |   4 +-
 .github/workflows/ci_test-full.yml     |   4 +-
 .github/workflows/ci_test-mnodes.yml   | 210 -------------------------
 .github/workflows/code-checks.yml      |   5 +-
 .github/workflows/docs-checks.yml      |   4 +-
 .github/workflows/events-nightly.yml   |   2 +-
 .github/workflows/events-recurrent.yml |   2 +-
 .github/workflows/release-docker.yml   |   8 +-
 .github/workflows/release-pypi.yml     |   2 +-
 README.md                              |  22 +--
 dockers/base-cuda/Dockerfile           |   2 +-
 dockers/base-xla/Dockerfile            |   2 +-
 dockers/release/Dockerfile             |   2 +-
 dockers/tpu-tests/Dockerfile           |   2 +-
 18 files changed, 41 insertions(+), 251 deletions(-)
 delete mode 100644 .github/workflows/ci_test-mnodes.yml

diff --git a/.github/workflows/ci_dockers.yml b/.github/workflows/ci_dockers.yml
index 02426529574f6..bd45247e15df2 100644
--- a/.github/workflows/ci_dockers.yml
+++ b/.github/workflows/ci_dockers.yml
@@ -1,4 +1,4 @@
-name: CI build Docker
+name: Docker
 # https://www.docker.com/blog/first-docker-github-action-is-here
 # https://github.com/docker/build-push-action
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
@@ -23,9 +23,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # should be the config used in '.github/workflows/release-docker.yml', but we just keep one to check.
-        python_version: ["3.9"]
-        pytorch_version: ["1.9"]
+        # the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image
+        python_version: ["3.7"]
+        pytorch_version: ["1.8"]
     steps:
       - name: Checkout
         uses: actions/checkout@v2
diff --git a/.github/workflows/ci_pkg-install.yml b/.github/workflows/ci_pkg-install.yml
index 12f3976d078e4..1fd7ed49d5a47 100644
--- a/.github/workflows/ci_pkg-install.yml
+++ b/.github/workflows/ci_pkg-install.yml
@@ -1,4 +1,4 @@
-name: Install pkg
+name: Package
 
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
 on:  # Trigger the workflow on push or pull request, but only for the master branch
@@ -9,7 +9,7 @@ on:  # Trigger the workflow on push or pull request, but only for the master bra
 
 jobs:
 
-  pkg-install:
+  install:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
diff --git a/.github/workflows/ci_schema.yml b/.github/workflows/ci_schema.yml
index 51c4400666fd0..d635285fae39a 100644
--- a/.github/workflows/ci_schema.yml
+++ b/.github/workflows/ci_schema.yml
@@ -1,11 +1,11 @@
-name: CI action schema
+name: Schema
 on: # Trigger the workflow on push or pull request, but only for the master branch
   push: {}
   pull_request:
     branches: [master, "release/*"]
 
 jobs:
-  validate-schema:
+  check:
     runs-on: ubuntu-20.04
     steps:
       - name: Checkout
diff --git a/.github/workflows/ci_test-base.yml b/.github/workflows/ci_test-base.yml
index e92249cab4030..03871420c09df 100644
--- a/.github/workflows/ci_test-base.yml
+++ b/.github/workflows/ci_test-base.yml
@@ -1,6 +1,6 @@
 # this jobs runs `pytest` over the source directory. It does not install any extra dependencies.
 # this is useful to catch errors where an import has been added which is not part of the basic dependencies.
-name: CI basic testing
+name: Test
 
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
 on:  # Trigger the workflow on push or pull request, but only for the master branch
@@ -10,8 +10,7 @@ on:  # Trigger the workflow on push or pull request, but only for the master bra
     branches: [master, "release/*"]
 
 jobs:
-  doctest:
-
+  source:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
diff --git a/.github/workflows/ci_test-conda.yml b/.github/workflows/ci_test-conda.yml
index e0808a79fd384..f996c8cc7dcea 100644
--- a/.github/workflows/ci_test-conda.yml
+++ b/.github/workflows/ci_test-conda.yml
@@ -1,4 +1,4 @@
-name: PyTorch & Conda
+name: Test
 
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
 on:  # Trigger the workflow on push or pull request, but only for the master branch
@@ -15,7 +15,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8"]  # previous to last Python version as that one is already used in test-full
-        pytorch-version: ["1.7", "1.8", "1.9", "1.10"]
+        pytorch-version: ["1.7", "1.8", "1.9", "1.10"]  # nightly: add when there's a release candidate
 
     # Timeout: https://stackoverflow.com/a/59076067/4521646
     timeout-minutes: 35
diff --git a/.github/workflows/ci_test-full.yml b/.github/workflows/ci_test-full.yml
index 57aedf68dcb84..c86fb458f403e 100644
--- a/.github/workflows/ci_test-full.yml
+++ b/.github/workflows/ci_test-full.yml
@@ -1,4 +1,4 @@
-name: CI complete testing
+name: Test
 
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
 on:  # Trigger the workflow on push or pull request, but only for the master branch
@@ -10,7 +10,7 @@ on:  # Trigger the workflow on push or pull request, but only for the master bra
 
 jobs:
 
-  pytest:
+  cpu:
 
     runs-on: ${{ matrix.os }}
     if: github.event.pull_request.draft == false
diff --git a/.github/workflows/ci_test-mnodes.yml b/.github/workflows/ci_test-mnodes.yml
deleted file mode 100644
index fbc0dc2b6e924..0000000000000
--- a/.github/workflows/ci_test-mnodes.yml
+++ /dev/null
@@ -1,210 +0,0 @@
-name: Multi Nodes GPU Tests
-
-# Workflow Steps:
-#  1. Checkout Pytorch Lightning
-#  2. Set up Python
-#  3. Configure AWS Credentials
-#  4. Install AWS Client
-#  5. Get Current Sha Commit
-#  6. Create Job Name
-#  7. Update Test Configuration File
-#  8. Install EKSClient
-#  9. Create Gpu Node Pool
-#  10. Check Current Node Pool | Current Elatic Pods
-#  11. Apply Elastic
-#  12. Wait 5 sec
-#  13. Find ETCD TCP Address
-#  14. Update Test Configuration File
-#  15. Apply Multi Node Testing
-#  16. Wait 120 secs
-#  17. Listen to Jobs Logging
-#  18. Statistics
-#  19. Upload coverage results
-#  20. Upload coverage to Codecov
-#  21. Delete Group Node
-
-on:
-  push:
-    branches:
-      - never-ever-run-
-  #pull_request:
-  #  types: [closed]
-
-env:
-  AWS_CLUSTER: pl-lightning-torchelastic
-  NODE_TYPE: g4dn.xlarge
-  NODES: 2
-  NUM_GPUS: 1
-  REGION: us-east-2
-  MAX_CHECKS: 300
-  CHECK_SPEEP: 2
-
-jobs:
-  multi-nodes-gpu-testing:
-    runs-on: ubuntu-20.04
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: [3.7]
-        pytorch-version: [1.6]
-    # Timeout: https://stackoverflow.com/a/59076067/4521646
-    timeout-minutes: 50
-
-    # runs only when merged happened.
-    # if: github.event.pull_request.merged == true
-    steps:
-
-    - name: Checkout Pytorch Lightning
-      uses: actions/checkout@v2
-      with:
-        repository: PyTorchLightning/pytorch-lightning
-        ref: ${{ github.event.base_ref }}
-
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Weekly reset caching
-      run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
-      id: times
-
-    # Note: This uses an internal pip API and may not always work
-    # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
-    - name: Cache pip
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-td${{ steps.times.outputs.period }}-multi-node
-        restore-keys: |
-          ${{ runner.os }}-pip-td${{ steps.times.outputs.period }}-
-
-    - name: Install dependencies
-      run: |
-        pip install awscli coverage
-
-    - name: Configure AWS Credentials
-      uses: aws-actions/configure-aws-credentials@v1
-      with:
-        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY_ID }}
-        aws-region: us-east-2
-
-    - name: Get Current Sha Commit
-      id: vars
-      shell: bash
-      run: |
-        echo "::set-output name=SHA::$(git rev-parse --short HEAD)"
-        echo $PWD
-
-    - name: Create Job Name
-      id: job
-      shell: bash
-      run: |
-        echo "::set-output name=ID::$(echo '${{ steps.vars.outputs.SHA }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}' | tr . - )"
-        echo "::set-output name=ID_NAME::$(echo 's-${{ steps.vars.outputs.SHA }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-e' | tr . - )"
-
-    - name: Install EKSClient
-      run: |
-        curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp
-        sudo mv /tmp/eksctl /usr/local/bin
-      shell: bash
-
-    - name: Create Gpu Node Pool
-      run: |
-        aws eks --region $REGION update-kubeconfig --name $AWS_CLUSTER
-        eksctl create nodegroup --name=${{ steps.job.outputs.ID }} --cluster=$AWS_CLUSTER --node-type=$NODE_TYPE  --nodes=$NODES
-        # eksctl create nodegroup --name=${{ steps.job.outputs.ID }} --cluster=$AWS_CLUSTER --managed --spot --node-type=$NODE_TYPE  --nodes=$NODES
-      shell: bash
-
-    - name: Check Current Node Pool | Current Elatic Pods
-      run: |
-        eksctl get nodegroups --cluster $AWS_CLUSTER
-        kubectl get pods -n elastic-job
-
-    - name: Apply Elastic
-      run: |
-        git clone https://github.com/pytorch/elastic.git
-        cd elastic/kubernetes
-
-        kubectl apply -k config/default
-
-        kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/master/nvidia-device-plugin.yml
-        kubectl apply -f https://raw.githubusercontent.com/pytorch/elastic/master/kubernetes/config/samples/etcd.yaml
-
-    - name: Wait
-      # todo: this shall be dynamic
-      if: always()
-      shell: bash
-      run: |
-        sleep 5
-
-    - name: Find ETCD TCP Address
-      id: tcp
-      shell: bash
-      run: |
-        echo "::set-output name=TCP_ADDRESS::$(kubectl logs etcd -n elastic-job | grep -Eo '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]{1,4}' | head -1)"
-
-    - name: Update Test Config. File
-      run: |
-        import os
-        from dtrun.configs import prepare_multi_nodes_gpu_config
-
-        assert os.path.isfile('./tests/mnode_tests.txt')
-        prepare_multi_nodes_gpu_config(
-            './.github/multi-nodes-gpu.yaml',
-            './tests/mnode_tests.txt',
-            sha="${{ steps.vars.outputs.SHA }}",
-            tcp_address="${{ steps.tcp.outputs.TCP_ADDRESS }}",
-            python_version="${{ matrix.python-version }}",
-            torch_version="${{ matrix.pytorch-version }}",
-            num_gpus=1,
-        )
-      shell: python
-
-    - name: Apply Multi Node Testing
-      run: |
-        # cat ./.github/multi-nodes-gpu.yaml
-        kubectl apply -f ./.github/multi-nodes-gpu.yaml
-      shell: bash
-
-    - name: Wait
-      # todo: this shall be dynamic
-      if: always()
-      shell: bash
-      run: |
-        sleep 400
-
-    - name: Listen to Jobs Logging
-      shell: bash
-      run: |
-        # todo: Enable automatic checking.
-        # while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl logs ${{ steps.job.outputs.ID_NAME }}-worker-0 -n elastic-job | grep -i "error\|failed"; then status_code=1 && break; elif kubectl logs ${{ steps.job.outputs.ID }}-worker-0 -n elastic-job | grep "TEST END"; then status_code=0 && break; else printf "." ; fi; sleep $CHECK_SPEEP; done && \
-        # echo "Done waiting. Job status code: $status_code" && \
-        kubectl logs ${{ steps.job.outputs.ID_NAME }}-worker-0 -n elastic-job > /tmp/full_output.txt
-        if grep -q 'END_TOKEN' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/END_TOKEN/'; else mv /tmp/full_output.txt xx00; fi && \
-        cat xx00
-
-    - name: Statistics
-      if: success()
-      run: |
-        cat ./xx01  | tail -n +2 | base64 --decode > /home/runner/work/pytorch-lightning/pytorch-lightning/.coverage
-        cd /home/runner/work/pytorch-lightning/pytorch-lightning && coverage report && coverage xml
-
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v1
-      if: always()
-      # see: https://github.com/actions/toolkit/issues/399
-      continue-on-error: true
-      with:
-        token: ${{ secrets.CODECOV_TOKEN }}
-        file: coverage.xml
-        flags: multi-nodes,pytest
-        name: multi-nodes-coverage
-        fail_ci_if_error: false
-
-    - name: Delete Group Node
-      if: always()
-      run: |
-       kubectl delete  ElasticJob ${{ steps.job.outputs.ID_NAME }} -n elastic-job
-       eksctl delete nodegroup ${{ steps.job.outputs.ID }} --cluster=$AWS_CLUSTER
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index 1cedf2c360306..e99863dc794d4 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -1,4 +1,4 @@
-name: "Check code"
+name: Test
 
 on:  # Trigger the workflow on push or pull request, but only for the master branch
   push:
@@ -7,8 +7,7 @@ on:  # Trigger the workflow on push or pull request, but only for the master bra
     branches: [master, "release/*"]
 
 jobs:
-  python-typing-mypy:
-    name: Python typing Mypy
+  mypy:
     runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@master
diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml
index 9d6b660a168f8..841f9128da8b1 100644
--- a/.github/workflows/docs-checks.yml
+++ b/.github/workflows/docs-checks.yml
@@ -1,4 +1,4 @@
-name: "Docs check"
+name: Test
 # https://github.com/marketplace/actions/sphinx-build
 
 on:  # Trigger the workflow on push or pull request, but only for the master branch
@@ -8,7 +8,7 @@ on:  # Trigger the workflow on push or pull request, but only for the master bra
     branches: [master, "release/*"]
 
 jobs:
-  test-docs:
+  doctest:
     runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v2
diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml
index f450e98380f10..7c2075ce5b440 100644
--- a/.github/workflows/events-nightly.yml
+++ b/.github/workflows/events-nightly.yml
@@ -1,4 +1,4 @@
-name: Nightly events
+name: Nightly
 
 # https://jasonet.co/posts/scheduled-actions/
 # https://github.community/t/distinct-job-for-each-schedule/17811/2
diff --git a/.github/workflows/events-recurrent.yml b/.github/workflows/events-recurrent.yml
index d7f1872fde732..834adc6c169fa 100644
--- a/.github/workflows/events-recurrent.yml
+++ b/.github/workflows/events-recurrent.yml
@@ -1,4 +1,4 @@
-name: Recurrent events
+name: Recurrent
 
 # https://jasonet.co/posts/scheduled-actions/
 # https://github.community/t/distinct-job-for-each-schedule/17811/2
diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml
index f7017d35d9e88..169e01edd8d48 100644
--- a/.github/workflows/release-docker.yml
+++ b/.github/workflows/release-docker.yml
@@ -1,4 +1,4 @@
-name: Publish Docker Releases
+name: Docker
 # https://www.docker.com/blog/first-docker-github-action-is-here
 # https://github.com/docker/build-push-action
 on:
@@ -8,7 +8,7 @@ on:
     types: [published]
 
 jobs:
-  cuda-PL:
+  publish:
     runs-on: ubuntu-20.04
     # only on releases
     if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'release'
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.6", "3.7", "3.8", "3.9"]
-        pytorch_version: ["1.7", "1.8", "1.9"]
+        pytorch_version: ["1.7", "1.8", "1.9", "1.10"]
     steps:
       - name: Checkout
         uses: actions/checkout@v2
@@ -39,7 +39,7 @@ jobs:
       - name: Publish Latest to Docker
         uses: docker/build-push-action@v1.1.0
         # only on releases and latest Python and PyTorch
-        if: matrix.python_version == 3.9 && matrix.pytorch_version == 1.9
+        if: matrix.python_version == "3.9" && matrix.pytorch_version == "1.10"
         with:
           repository: pytorchlightning/pytorch_lightning
           username: ${{ secrets.DOCKER_USERNAME }}
diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
index a91837cab3340..09afd4db893d3 100644
--- a/.github/workflows/release-pypi.yml
+++ b/.github/workflows/release-pypi.yml
@@ -1,4 +1,4 @@
-name: PyPI Release
+name: PyPI
 
 # https://help.github.com/en/actions/reference/events-that-trigger-workflows
 on:  # Trigger the workflow on push or pull request, but only for the master branch
diff --git a/README.md b/README.md
index 6e0e4b5dbf52d..9d618955e1140 100644
--- a/README.md
+++ b/README.md
@@ -78,18 +78,20 @@ Lightning is rigorously tested across multiple GPUs, TPUs CPUs and against major
 
 <center>
 
-|   System / PyTorch ver.    |                                                                                                                           1.6 (min. req.)                                                                                                                            |                                                                                                                                 1.7                                                                                                                                  |                                                                                                                              1.8 (LTS)                                                                                                                               |                                                                                                                             1.9 (latest)                                                                                                                             |                                                                                                                            1.10 (nightly)                                                                                                                            |
-| :------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|   Conda py3.7 \[linux\]    | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) |
-|  Linux py3.7 \[GPUs\*\*\]  |                                                                                                                                  -                                                                                                                                   |                                                                                                                                  -                                                                                                                                   |      [![Build Status](<https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master)       |                                                                                                                                  -                                                                                                                                   |                                                                                                                                  -                                                                                                                                   |
-| Linux py3.7 \[TPUs\*\*\*\] |                                                                                                                                  -                                                                                                                                   |                                                                                                                                  -                                                                                                                                   |                                             [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master)                                              |                                                                                                                                  -                                                                                                                                   |                                                                                                                                  -                                                                                                                                   |
-|    Linux py3.{6,7,8,9}     |        [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22)         |                                                                                                                                  -                                                                                                                                   |                                                                                                                                  -                                                                                                                                   |        [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22)         |                                                                                                                                  -                                                                                                                                   |
-|     OSX py3.{6,7,8,9}      |        [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22)         |                                                                                                                                  -                                                                                                                                   |                                                                                                                                  -                                                                                                                                   |        [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22)         |                                                                                                                                  -                                                                                                                                   |
-|   Windows py3.{6,7,8,9}    |        [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22)         |                                                                                                                                  -                                                                                                                                   |                                                                                                                                  -                                                                                                                                   |        [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22)         |                                                                                                                                  -                                                                                                                                   |
+|   System / PyTorch ver.    |                                                                                                          1.7 (min. req.)                                                                                                           |                                                                                                                         1.8 (LTS)                                                                                                                         |                                                                                                                1.9                                                                                                                 |                                                                                                           1.10 (latest)                                                                                                            |
+| :------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|  Linux py3.7 \[GPUs\*\*\]  |                                                                                                                 -                                                                                                                  | [![Build Status](<https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) |                                                                                                                 -                                                                                                                  |                                                                                                                 -                                                                                                                  |
+| Linux py3.7 \[TPUs\*\*\*\] |                                                                                                                 -                                                                                                                  |                                        [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master)                                        |                                                                                                                 -                                                                                                                  |                                                                                                                 -                                                                                                                  |
+|  Linux py3.8 (with Conda   | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) |            [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml)             | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) |
+|      Linux py3.{7,9}       |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |                                                                                                                             -                                                                                                                             |                                                                                                                 -                                                                                                                  |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |
+|       OSX py3.{7,9}        |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |                                                                                                                             -                                                                                                                             |                                                                                                                 -                                                                                                                  |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |
+|     Windows py3.{7,9}      |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |                                                                                                                             -                                                                                                                             |                                                                                                                 -                                                                                                                  |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |
+|        Linux py3.6         |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |                                                                                                                             -                                                                                                                             |                                                                                                                 -                                                                                                                  |                                                                                                                 -                                                                                                                  |
+|         OSX py3.6          |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |                                                                                                                             -                                                                                                                             |                                                                                                                 -                                                                                                                  |                                                                                                                 -                                                                                                                  |
+|       Windows py3.6        |  [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml)  |                                                                                                                             -                                                                                                                             |                                                                                                                 -                                                                                                                  |                                                                                                                 -                                                                                                                  |
 
 - _\*\* tests run on two NVIDIA P100_
-- _\*\*\* tests run on Google GKE TPUv2/3_
-- _TPU py3.7 means we support Colab and Kaggle env._
+- _\*\*\* tests run on Google GKE TPUv2/3. TPU py3.7 means we support Colab and Kaggle env._
 
 </center>
 </details>
diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile
index ab26af6c7accf..99e8d018f2884 100644
--- a/dockers/base-cuda/Dockerfile
+++ b/dockers/base-cuda/Dockerfile
@@ -17,7 +17,7 @@ ARG CUDA_VERSION=10.2
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04
 
 ARG PYTHON_VERSION=3.9
-ARG PYTORCH_VERSION=1.6
+ARG PYTORCH_VERSION=1.8
 
 SHELL ["/bin/bash", "-c"]
 # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/
diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile
index 5c86da2147717..e293343614927 100644
--- a/dockers/base-xla/Dockerfile
+++ b/dockers/base-xla/Dockerfile
@@ -19,7 +19,7 @@ LABEL maintainer="PyTorchLightning <https://github.com/PyTorchLightning>"
 # CALL: docker image build -t pytorch-lightning:XLA-extras-py3.6 -f dockers/base-xla/Dockerfile . --build-arg PYTHON_VERSION=3.8
 ARG PYTHON_VERSION=3.9
 ARG CONDA_VERSION=4.9.2
-ARG XLA_VERSION=1.6
+ARG XLA_VERSION=1.8
 
 SHELL ["/bin/bash", "-c"]
 # for skipping configurations
diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile
index 529680059791c..f4083f2dd42fc 100644
--- a/dockers/release/Dockerfile
+++ b/dockers/release/Dockerfile
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 ARG PYTHON_VERSION=3.9
-ARG PYTORCH_VERSION=1.6
+ARG PYTORCH_VERSION=1.8
 
 FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
 
diff --git a/dockers/tpu-tests/Dockerfile b/dockers/tpu-tests/Dockerfile
index 086bd349bc757..6605b9abbaadc 100644
--- a/dockers/tpu-tests/Dockerfile
+++ b/dockers/tpu-tests/Dockerfile
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 ARG PYTHON_VERSION=3.9
-ARG PYTORCH_VERSION=1.6
+ARG PYTORCH_VERSION=1.8
 
 FROM pytorchlightning/pytorch_lightning:base-xla-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
 

From 5ba5b7247336a1a8450fd579b2fe8495f827428d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Thu, 11 Nov 2021 18:15:18 +0100
Subject: [PATCH 03/18] Update tests to avoid the deprecated `weights_summary`
 (#10446)

---
 pytorch_lightning/core/lightning.py           |  2 +-
 pytorch_lightning/tuner/batch_size_scaling.py |  3 -
 tests/callbacks/test_model_summary.py         | 17 +----
 tests/deprecated_api/test_remove_1-7.py       |  7 +-
 tests/tuner/test_scale_batch_size.py          |  1 -
 tests/utilities/test_model_summary.py         | 74 +++++++++++--------
 6 files changed, 50 insertions(+), 54 deletions(-)

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 7867211badb35..bf335ec8b7acc 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1708,7 +1708,7 @@ def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None
         Return:
             The model summary object
         """
-        warning_cache.deprecation(
+        rank_zero_deprecation(
             "The `LightningModule.summarize` method is deprecated in v1.5 and will be removed in v1.7. "
             "Use `pytorch_lightning.utilities.model_summary.summarize` instead.",
             stacklevel=6,
diff --git a/pytorch_lightning/tuner/batch_size_scaling.py b/pytorch_lightning/tuner/batch_size_scaling.py
index faf2ee4f5bb9c..bf6e0cff49772 100644
--- a/pytorch_lightning/tuner/batch_size_scaling.py
+++ b/pytorch_lightning/tuner/batch_size_scaling.py
@@ -106,7 +106,6 @@ def __scale_batch_dump_params(trainer: "pl.Trainer") -> None:
         "current_epoch": trainer.current_epoch,
         "global_step": trainer.global_step,
         "max_steps": trainer.max_steps,
-        "weights_summary": trainer.weights_summary,
         "logger": trainer.logger,
         "callbacks": trainer.callbacks,
         "checkpoint_callback": trainer.checkpoint_callback,
@@ -121,7 +120,6 @@ def __scale_batch_reset_params(trainer: "pl.Trainer", model: "pl.LightningModule
     trainer.auto_lr_find = False  # avoid lr find being called multiple times
     trainer.fit_loop.current_epoch = 0
     trainer.fit_loop.max_steps = steps_per_trial  # take few steps
-    trainer.weights_summary = None  # not needed before full run
     trainer.logger = DummyLogger() if trainer.logger is not None else None
     trainer.callbacks = []  # not needed before full run
     trainer.limit_train_batches = 1.0
@@ -134,7 +132,6 @@ def __scale_batch_restore_params(trainer: "pl.Trainer") -> None:
     trainer.fit_loop.current_epoch = trainer.__dumped_params["current_epoch"]
     trainer.fit_loop.global_step = trainer.__dumped_params["global_step"]
     trainer.fit_loop.max_steps = trainer.__dumped_params["max_steps"]
-    trainer.weights_summary = trainer.__dumped_params["weights_summary"]
     trainer.logger = trainer.__dumped_params["logger"]
     trainer.callbacks = trainer.__dumped_params["callbacks"]
     trainer.auto_scale_batch_size = trainer.__dumped_params["auto_scale_batch_size"]
diff --git a/tests/callbacks/test_model_summary.py b/tests/callbacks/test_model_summary.py
index a270d381f043a..f588d696c4e7e 100644
--- a/tests/callbacks/test_model_summary.py
+++ b/tests/callbacks/test_model_summary.py
@@ -17,8 +17,6 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import ModelSummary
-from pytorch_lightning.utilities import ModelSummaryMode
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 
 
@@ -48,26 +46,19 @@ def test_model_summary_callback_with_weights_summary_none():
 
 
 def test_model_summary_callback_with_weights_summary():
-
     trainer = Trainer(weights_summary="top")
-
     model_summary_callback = list(filter(lambda cb: isinstance(cb, ModelSummary), trainer.callbacks))[0]
     assert model_summary_callback._max_depth == 1
 
-    trainer = Trainer(weights_summary="full")
-
+    with pytest.deprecated_call(match=r"weights_summary=full\)` is deprecated"):
+        trainer = Trainer(weights_summary="full")
     model_summary_callback = list(filter(lambda cb: isinstance(cb, ModelSummary), trainer.callbacks))[0]
     assert model_summary_callback._max_depth == -1
 
-    with pytest.raises(
-        MisconfigurationException, match=f"`weights_summary` can be None, {', '.join(list(ModelSummaryMode))}"
-    ):
-        _ = Trainer(weights_summary="invalid")
-
 
 def test_model_summary_callback_override_weights_summary_flag():
-
-    trainer = Trainer(callbacks=ModelSummary(), weights_summary=None)
+    with pytest.deprecated_call(match=r"weights_summary=None\)` is deprecated"):
+        trainer = Trainer(callbacks=ModelSummary(), weights_summary=None)
     assert any(isinstance(cb, ModelSummary) for cb in trainer.callbacks)
 
 
diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py
index ec44d9842ce2a..4da10fb0b666a 100644
--- a/tests/deprecated_api/test_remove_1-7.py
+++ b/tests/deprecated_api/test_remove_1-7.py
@@ -38,12 +38,9 @@
 
 
 def test_v1_7_0_deprecated_lightning_module_summarize(tmpdir):
-    from pytorch_lightning.core.lightning import warning_cache
-
     model = BoringModel()
-    model.summarize(max_depth=1)
-    assert any("The `LightningModule.summarize` method is deprecated in v1.5" in w for w in warning_cache)
-    warning_cache.clear()
+    with pytest.deprecated_call(match="The `LightningModule.summarize` method is deprecated in v1.5"):
+        model.summarize(max_depth=1)
 
 
 def test_v1_7_0_moved_model_summary_and_layer_summary(tmpdir):
diff --git a/tests/tuner/test_scale_batch_size.py b/tests/tuner/test_scale_batch_size.py
index 9dbb24d9edf30..d8657e0e463d5 100644
--- a/tests/tuner/test_scale_batch_size.py
+++ b/tests/tuner/test_scale_batch_size.py
@@ -114,7 +114,6 @@ def test_trainer_reset_correctly(tmpdir):
         "logger",
         "max_steps",
         "global_step",
-        "weights_summary",
     ]
     expected = {ca: getattr(trainer, ca) for ca in changed_attributes}
     trainer.tuner.scale_batch_size(model, max_trials=5)
diff --git a/tests/utilities/test_model_summary.py b/tests/utilities/test_model_summary.py
index 1f44e0a74f68d..669892984f0cf 100644
--- a/tests/utilities/test_model_summary.py
+++ b/tests/utilities/test_model_summary.py
@@ -141,31 +141,41 @@ def forward(self, inp):
 
 def test_invalid_weights_summmary():
     """Test that invalid value for weights_summary raises an error."""
+    model = LightningModule()
+
     with pytest.raises(MisconfigurationException, match="`mode` can be None, .* got temp"):
-        summarize(UnorderedModel, mode="temp")
+        summarize(model, mode="temp")
 
-    with pytest.raises(MisconfigurationException, match="`weights_summary` can be None, .* got temp"):
+    with pytest.raises(
+        MisconfigurationException, match="`weights_summary` can be None, .* got temp"
+    ), pytest.deprecated_call(match="weights_summary=temp)` is deprecated"):
         Trainer(weights_summary="temp")
 
+    with pytest.raises(MisconfigurationException, match="mode` can be .* got temp"):
+        ModelSummary(model, mode="temp")
+
+    with pytest.raises(ValueError, match="max_depth` can be .* got temp"):
+        ModelSummary(model, max_depth="temp")
+
 
-@pytest.mark.parametrize("mode", ["full", "top"])
-def test_empty_model_summary_shapes(mode: str):
+@pytest.mark.parametrize("max_depth", [-1, 1])
+def test_empty_model_summary_shapes(max_depth):
     """Test that the summary works for models that have no submodules."""
     model = EmptyModule()
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert summary.in_sizes == []
     assert summary.out_sizes == []
     assert summary.param_nums == []
 
 
 @RunIf(min_gpus=1)
-@pytest.mark.parametrize("mode", ["full", "top"])
+@pytest.mark.parametrize("max_depth", [-1, 1])
 @pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
-def test_linear_model_summary_shapes(device, mode):
+def test_linear_model_summary_shapes(device, max_depth):
     """Test that the model summary correctly computes the input- and output shapes."""
     model = UnorderedModel().to(device)
     model.train()
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert summary.in_sizes == [[2, 10], [2, 7], [2, 3], [2, 7], UNKNOWN_SIZE]  # layer 2  # combine  # layer 1  # relu
     assert summary.out_sizes == [[2, 2], [2, 9], [2, 5], [2, 7], UNKNOWN_SIZE]  # layer 2  # combine  # layer 1  # relu
     assert model.training
@@ -191,8 +201,8 @@ def test_hooks_removed_after_summarize(max_depth):
         assert handle.id not in handle.hooks_dict_ref()
 
 
-@pytest.mark.parametrize("mode", ["full", "top"])
-def test_rnn_summary_shapes(mode):
+@pytest.mark.parametrize("max_depth", [-1, 1])
+def test_rnn_summary_shapes(max_depth):
     """Test that the model summary works for RNNs."""
     model = ParityModuleRNN()
 
@@ -204,16 +214,16 @@ def test_rnn_summary_shapes(mode):
 
     model.example_input_array = torch.zeros(b, t, 10)
 
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert summary.in_sizes == [[b, t, i], [b, t, h]]  # rnn  # linear
     assert summary.out_sizes == [[[b, t, h], [[1, b, h], [1, b, h]]], [b, t, o]]  # rnn  # linear
 
 
-@pytest.mark.parametrize("mode", ["full", "top"])
-def test_summary_parameter_count(mode):
+@pytest.mark.parametrize("max_depth", [-1, 1])
+def test_summary_parameter_count(max_depth):
     """Test that the summary counts the number of parameters in every submodule."""
     model = UnorderedModel()
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert summary.param_nums == [
         model.layer2.weight.numel() + model.layer2.bias.numel(),
         model.combine.weight.numel() + model.combine.bias.numel(),
@@ -223,24 +233,24 @@ def test_summary_parameter_count(mode):
     ]
 
 
-@pytest.mark.parametrize("mode", ["full", "top"])
-def test_summary_layer_types(mode):
+@pytest.mark.parametrize("max_depth", [-1, 1])
+def test_summary_layer_types(max_depth):
     """Test that the summary displays the layer names correctly."""
     model = UnorderedModel()
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert summary.layer_types == ["Linear", "Linear", "Linear", "ReLU", "Conv2d"]
 
 
-@pytest.mark.parametrize("mode", ["full", "top"])
-def test_summary_with_scripted_modules(mode):
+@pytest.mark.parametrize("max_depth", [-1, 1])
+def test_summary_with_scripted_modules(max_depth):
     model = PartialScriptModel()
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert summary.layer_types == ["RecursiveScriptModule", "Linear"]
     assert summary.in_sizes == [UNKNOWN_SIZE, [2, 3]]
     assert summary.out_sizes == [UNKNOWN_SIZE, [2, 2]]
 
 
-@pytest.mark.parametrize("mode", ["full", "top"])
+@pytest.mark.parametrize("max_depth", [-1, 1])
 @pytest.mark.parametrize(
     ["example_input", "expected_size"],
     [
@@ -253,7 +263,7 @@ def test_summary_with_scripted_modules(mode):
         ((torch.zeros(2, 3), torch.zeros(4, 5)), [[2, 3], [4, 5]]),
     ],
 )
-def test_example_input_array_types(example_input, expected_size, mode):
+def test_example_input_array_types(example_input, expected_size, max_depth):
     """Test the types of example inputs supported for display in the summary."""
 
     class DummyModule(nn.Module):
@@ -271,23 +281,23 @@ def forward(self, *args, **kwargs):
 
     model = DummyLightningModule()
     model.example_input_array = example_input
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert summary.in_sizes == [expected_size]
 
 
-@pytest.mark.parametrize("mode", ["full", "top"])
-def test_model_size(mode):
+@pytest.mark.parametrize("max_depth", [-1, 1])
+def test_model_size(max_depth):
     """Test model size is calculated correctly."""
     model = PreCalculatedModel()
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert model.pre_calculated_model_size == summary.model_size
 
 
-@pytest.mark.parametrize("mode", ["full", "top"])
-def test_empty_model_size(mode):
+@pytest.mark.parametrize("max_depth", [-1, 1])
+def test_empty_model_size(max_depth):
     """Test empty model size is zero."""
     model = EmptyModule()
-    summary = summarize(model, mode=mode)
+    summary = summarize(model, max_depth=max_depth)
     assert 0.0 == summary.model_size
 
 
@@ -328,11 +338,13 @@ def test_max_depth_equals_mode_interface():
     """Test summarize(model, full/top) interface mapping matches max_depth."""
     model = DeepNestedModel()
 
-    summary_top = summarize(model, mode="top")
+    with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"):
+        summary_top = summarize(model, mode="top")
     summary_0 = summarize(model, max_depth=1)
     assert str(summary_top) == str(summary_0)
 
-    summary_full = summarize(model, mode="full")
+    with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"):
+        summary_full = summarize(model, mode="full")
     summary_minus1 = summarize(model, max_depth=-1)
     assert str(summary_full) == str(summary_minus1)
 

From 09cf167237e867f1ec67a5db87e5a02c2cea4b69 Mon Sep 17 00:00:00 2001
From: Raahul Singh <raahulsingh002@gmail.com>
Date: Fri, 12 Nov 2021 01:23:40 +0530
Subject: [PATCH 04/18] Change attributes of `RichProgressBarTheme` dataclass
 (#10454)

Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
---
 .../callbacks/progress/rich_progress.py       | 34 +++++++++++++------
 tests/callbacks/test_rich_progress_bar.py     |  4 +--
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/pytorch_lightning/callbacks/progress/rich_progress.py b/pytorch_lightning/callbacks/progress/rich_progress.py
index b07b487927418..c091223fba0bd 100644
--- a/pytorch_lightning/callbacks/progress/rich_progress.py
+++ b/pytorch_lightning/callbacks/progress/rich_progress.py
@@ -129,11 +129,12 @@ def render(self, task) -> RenderableType:
     class MetricsTextColumn(ProgressColumn):
         """A column containing text."""
 
-        def __init__(self, trainer):
+        def __init__(self, trainer, style):
             self._trainer = trainer
             self._tasks = {}
             self._current_task_id = 0
             self._metrics = {}
+            self._style = style
             super().__init__()
 
         def update(self, metrics):
@@ -158,23 +159,34 @@ def render(self, task) -> Text:
 
             for k, v in self._metrics.items():
                 _text += f"{k}: {round(v, 3) if isinstance(v, float) else v} "
-            return Text(_text, justify="left")
+            return Text(_text, justify="left", style=self._style)
 
 
 @dataclass
 class RichProgressBarTheme:
     """Styles to associate to different base components.
 
+    Args:
+        description: Style for the progress bar description. For eg., Epoch x, Testing, etc.
+        progress_bar: Style for the bar in progress.
+        progress_bar_finished: Style for the finished progress bar.
+        progress_bar_pulse: Style for the progress bar when `IterableDataset` is being processed.
+        batch_progress: Style for the progress tracker (i.e 10/50 batches completed).
+        time: Style for the processed time and estimate time remaining.
+        processing_speed: Style for the speed of the batches being processed.
+        metrics: Style for the metrics
+
     https://rich.readthedocs.io/en/stable/style.html
     """
 
-    text_color: str = "white"
-    progress_bar_complete: Union[str, Style] = "#6206E0"
+    description: Union[str, Style] = "white"
+    progress_bar: Union[str, Style] = "#6206E0"
     progress_bar_finished: Union[str, Style] = "#6206E0"
     progress_bar_pulse: Union[str, Style] = "#6206E0"
-    batch_process: str = "white"
-    time: str = "grey54"
-    processing_speed: str = "grey70"
+    batch_progress: Union[str, Style] = "white"
+    time: Union[str, Style] = "grey54"
+    processing_speed: Union[str, Style] = "grey70"
+    metrics: Union[str, Style] = "white"
 
 
 class RichProgressBar(ProgressBarBase):
@@ -273,7 +285,7 @@ def _init_progress(self, trainer):
             self._reset_progress_bar_ids()
             self._console: Console = Console()
             self._console.clear_live()
-            self._metric_component = MetricsTextColumn(trainer)
+            self._metric_component = MetricsTextColumn(trainer, self.theme.metrics)
             self.progress = CustomProgress(
                 *self.configure_columns(trainer),
                 self._metric_component,
@@ -356,7 +368,7 @@ def on_validation_epoch_start(self, trainer, pl_module):
     def _add_task(self, total_batches: int, description: str, visible: bool = True) -> Optional[int]:
         if self.progress is not None:
             return self.progress.add_task(
-                f"[{self.theme.text_color}]{description}", total=total_batches, visible=visible
+                f"[{self.theme.description}]{description}", total=total_batches, visible=visible
             )
 
     def _update(self, progress_bar_id: int, visible: bool = True) -> None:
@@ -453,11 +465,11 @@ def configure_columns(self, trainer) -> list:
         return [
             TextColumn("[progress.description]{task.description}"),
             CustomBarColumn(
-                complete_style=self.theme.progress_bar_complete,
+                complete_style=self.theme.progress_bar,
                 finished_style=self.theme.progress_bar_finished,
                 pulse_style=self.theme.progress_bar_pulse,
             ),
-            BatchesProcessedColumn(style=self.theme.batch_process),
+            BatchesProcessedColumn(style=self.theme.batch_progress),
             CustomTimeColumn(style=self.theme.time),
             ProcessingSpeedColumn(style=self.theme.processing_speed),
         ]
diff --git a/tests/callbacks/test_rich_progress_bar.py b/tests/callbacks/test_rich_progress_bar.py
index 31681754423a8..8f3f20630b5c0 100644
--- a/tests/callbacks/test_rich_progress_bar.py
+++ b/tests/callbacks/test_rich_progress_bar.py
@@ -106,11 +106,11 @@ def test_rich_progress_bar_custom_theme(tmpdir):
 
         assert progress_bar.theme == theme
         args, kwargs = mocks["CustomBarColumn"].call_args
-        assert kwargs["complete_style"] == theme.progress_bar_complete
+        assert kwargs["complete_style"] == theme.progress_bar
         assert kwargs["finished_style"] == theme.progress_bar_finished
 
         args, kwargs = mocks["BatchesProcessedColumn"].call_args
-        assert kwargs["style"] == theme.batch_process
+        assert kwargs["style"] == theme.batch_progress
 
         args, kwargs = mocks["CustomTimeColumn"].call_args
         assert kwargs["style"] == theme.time

From fa0ed17f8a18e887ba272a057e7c35c61d7f04fa Mon Sep 17 00:00:00 2001
From: Rohit Gupta <rohitgr1998@gmail.com>
Date: Fri, 12 Nov 2021 18:12:25 +0530
Subject: [PATCH 05/18] remove deprecated train_loop (#10482)

* remove deprecated train_loop

* chlog
---
 CHANGELOG.md                            | 3 +++
 docs/source/common/lightning_module.rst | 4 ++--
 pytorch_lightning/trainer/trainer.py    | 7 -------
 tests/deprecated_api/test_remove_1-6.py | 8 --------
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0082201aa1cf9..87ecfdef4d448 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -113,6 +113,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451))
 
 
+- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
+
+
 ### Fixed
 
 - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702))
diff --git a/docs/source/common/lightning_module.rst b/docs/source/common/lightning_module.rst
index 77a2c719736be..166b0b2384461 100644
--- a/docs/source/common/lightning_module.rst
+++ b/docs/source/common/lightning_module.rst
@@ -1167,14 +1167,14 @@ for more information.
 
         on_train_start()
         for epoch in epochs:
-            train_loop()
+            fit_loop()
         on_train_end()
 
         on_fit_end()
         teardown("fit")
 
 
-    def train_loop():
+    def fit_loop():
         on_epoch_start()
         on_train_epoch_start()
 
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b6dfcbfee8bc6..396289000251d 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -2134,13 +2134,6 @@ def __getstate__(self):
     def __setstate__(self, state):
         self.__dict__ = state
 
-    @property
-    def train_loop(self) -> FitLoop:
-        rank_zero_deprecation(
-            "`Trainer.train_loop` has been renamed to `Trainer.fit_loop` and will be removed in v1.6."
-        )
-        return self.fit_loop
-
     @property
     def terminate_on_nan(self) -> bool:
         rank_zero_deprecation("`Trainer.terminate_on_nan` is deprecated in v1.5 and will be removed in 1.7.")
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index 686339df6317c..066922c8f4d16 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -69,14 +69,6 @@ def test_v1_6_0_is_overridden_model():
         assert not is_overridden("foo", model=model)
 
 
-def test_v1_6_0_train_loop(tmpdir):
-    trainer = Trainer()
-    with pytest.deprecated_call(
-        match=r"`Trainer.train_loop` has been renamed to `Trainer.fit_loop` and will be removed in v1.6."
-    ):
-        _ = trainer.train_loop
-
-
 def test_v1_6_0_deprecated_model_summary_mode(tmpdir):
     model = BoringModel()
     with pytest.deprecated_call(match="Argument `mode` in `ModelSummary` is deprecated in v1.4"):

From 847e24011af32dcf7bf14577b1e2e0a5fe3fb727 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Fri, 12 Nov 2021 19:03:47 +0100
Subject: [PATCH 06/18] Squeeze the early stopping monitor (#10461)

---
 CHANGELOG.md                                  |  2 +-
 pytorch_lightning/callbacks/early_stopping.py |  2 +-
 tests/callbacks/test_early_stopping.py        | 13 +++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 87ecfdef4d448..bad88d4411e9f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -124,7 +124,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374))
 
 
--
+- Squeeze the early stopping monitor to remove empty tensor dimensions ([#10461](https://github.com/PyTorchLightning/pytorch-lightning/issues/10461))
 
 
 -
diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 03b268f714a74..e292cd961711a 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -196,7 +196,7 @@ def _run_early_stopping_check(self, trainer: "pl.Trainer") -> None:
         ):  # short circuit if metric not present
             return
 
-        current = logs.get(self.monitor)
+        current = logs[self.monitor].squeeze()
         should_stop, reason = self._evaluate_stopping_criteria(current)
 
         # stop every ddp process if any world process decides to stop
diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py
index 9b20b96778e65..da200cc336504 100644
--- a/tests/callbacks/test_early_stopping.py
+++ b/tests/callbacks/test_early_stopping.py
@@ -469,3 +469,16 @@ def validation_step(self, batch, batch_idx):
         assert trainer.global_step == len(side_effect) * int(trainer.limit_train_batches * trainer.val_check_interval)
     else:
         assert trainer.current_epoch == len(side_effect) * trainer.check_val_every_n_epoch - 1
+
+
+def test_early_stopping_squeezes():
+    early_stopping = EarlyStopping(monitor="foo")
+    trainer = Trainer()
+    trainer.callback_metrics["foo"] = torch.tensor([[[0]]])
+
+    with mock.patch(
+        "pytorch_lightning.callbacks.EarlyStopping._evaluate_stopping_criteria", return_value=(False, "")
+    ) as es_mock:
+        early_stopping._run_early_stopping_check(trainer)
+
+    es_mock.assert_called_once_with(torch.tensor(0))

From fabb3644027a12c0df9acbdbc752d7ca7673396c Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Sat, 13 Nov 2021 01:02:43 +0530
Subject: [PATCH 07/18] Remove deprecated `mode` argument from ModelSummary
 (#10449)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                                 |  3 ++
 pytorch_lightning/core/lightning.py          |  9 +---
 pytorch_lightning/utilities/model_summary.py | 53 ++------------------
 tests/deprecated_api/test_remove_1-6.py      | 10 ----
 tests/utilities/test_model_summary.py        | 21 --------
 5 files changed, 9 insertions(+), 87 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bad88d4411e9f..3795f99684f47 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -113,6 +113,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451))
 
 
+- Removed deprecated `mode` argument from `ModelSummary` class ([#10449](https://github.com/PyTorchLightning/pytorch-lightning/pull/10449))
+
+
 - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
 
 
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index bf335ec8b7acc..a2fc9d1a21d4b 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -1689,7 +1689,7 @@ def tbptt_split_batch(self, batch, split_size):
 
         return splits
 
-    def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None) -> Optional[ModelSummary]:
+    def summarize(self, max_depth: int = 1) -> ModelSummary:
         """Summarize this LightningModule.
 
         .. deprecated:: v1.5
@@ -1697,11 +1697,6 @@ def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None
             and will be removed in v1.7.
 
         Args:
-            mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers).
-
-                .. deprecated:: v1.4
-                    This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
-
             max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the
                 layer summary off. Default: 1.
 
@@ -1714,7 +1709,7 @@ def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None
             stacklevel=6,
         )
 
-        return summarize(self, mode, max_depth)
+        return summarize(self, max_depth)
 
     def freeze(self) -> None:
         r"""
diff --git a/pytorch_lightning/utilities/model_summary.py b/pytorch_lightning/utilities/model_summary.py
index 9c2690202df90..bab6da5368b65 100644
--- a/pytorch_lightning/utilities/model_summary.py
+++ b/pytorch_lightning/utilities/model_summary.py
@@ -23,8 +23,7 @@
 from torch.utils.hooks import RemovableHandle
 
 import pytorch_lightning as pl
-from pytorch_lightning.utilities import AMPType, DeviceType, ModelSummaryMode, rank_zero_deprecation
-from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities import AMPType, DeviceType
 from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
 from pytorch_lightning.utilities.warnings import WarningCache
 
@@ -130,13 +129,6 @@ class ModelSummary:
 
     Args:
         model: The model to summarize (also referred to as the root module).
-        mode: Can be one of
-
-            - `top` (default): only the top-level modules will be recorded (the children of the root module)
-            - `full`: summarizes all layers and their submodules in the root module
-
-            .. deprecated:: v1.4
-                This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
 
         max_depth: Maximum depth of modules to show. Use -1 to show all modules or 0 to show no
             summary. Defaults to 1.
@@ -186,22 +178,9 @@ class ModelSummary:
         0.530     Total estimated model params size (MB)
     """
 
-    def __init__(self, model: "pl.LightningModule", mode: Optional[str] = None, max_depth: Optional[int] = 1) -> None:
+    def __init__(self, model: "pl.LightningModule", max_depth: int = 1) -> None:
         self._model = model
 
-        # temporary mapping from mode to max_depth
-        if max_depth is None or mode is not None:
-            if mode in ModelSummaryMode.supported_types():
-                max_depth = ModelSummaryMode.get_max_depth(mode)
-                rank_zero_deprecation(
-                    "Argument `mode` in `ModelSummary` is deprecated in v1.4"
-                    f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behaviour."
-                )
-            else:
-                raise MisconfigurationException(
-                    f"`mode` can be {', '.join(ModelSummaryMode.supported_types())}, got {mode}."
-                )
-
         if not isinstance(max_depth, int) or max_depth < -1:
             raise ValueError(f"`max_depth` can be -1, 0 or > 0, got {max_depth}.")
 
@@ -436,17 +415,11 @@ def _is_lazy_weight_tensor(p: Tensor) -> bool:
     return False
 
 
-def summarize(
-    lightning_module: "pl.LightningModule", mode: Optional[str] = None, max_depth: Optional[int] = None
-) -> ModelSummary:
+def summarize(lightning_module: "pl.LightningModule", max_depth: int = 1) -> ModelSummary:
     """Summarize the LightningModule specified by `lightning_module`.
 
     Args:
         lightning_module: `LightningModule` to summarize.
-        mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers).
-
-            .. deprecated:: v1.4
-                This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6.
 
         max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the
             layer summary off. Default: 1.
@@ -454,22 +427,4 @@ def summarize(
     Return:
         The model summary object
     """
-
-    # temporary mapping from mode to max_depth
-    if max_depth is None:
-        if mode is None:
-            model_summary = ModelSummary(lightning_module, max_depth=1)
-        elif mode in ModelSummaryMode.supported_types():
-            max_depth = ModelSummaryMode.get_max_depth(mode)
-            rank_zero_deprecation(
-                "Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"
-                f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behavior."
-            )
-            model_summary = ModelSummary(lightning_module, max_depth=max_depth)
-        else:
-            raise MisconfigurationException(
-                f"`mode` can be None, {', '.join(ModelSummaryMode.supported_types())}, got {mode}"
-            )
-    else:
-        model_summary = ModelSummary(lightning_module, max_depth=max_depth)
-    return model_summary
+    return ModelSummary(lightning_module, max_depth=max_depth)
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index 066922c8f4d16..0e450caa18b62 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -18,7 +18,6 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.utilities.model_helpers import is_overridden
-from pytorch_lightning.utilities.model_summary import ModelSummary
 from tests.helpers import BoringModel
 
 
@@ -69,15 +68,6 @@ def test_v1_6_0_is_overridden_model():
         assert not is_overridden("foo", model=model)
 
 
-def test_v1_6_0_deprecated_model_summary_mode(tmpdir):
-    model = BoringModel()
-    with pytest.deprecated_call(match="Argument `mode` in `ModelSummary` is deprecated in v1.4"):
-        ModelSummary(model, mode="top")
-
-    with pytest.deprecated_call(match="Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"):
-        model.summarize(mode="top")
-
-
 def test_v1_6_0_deprecated_disable_validation():
     trainer = Trainer()
     with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"):
diff --git a/tests/utilities/test_model_summary.py b/tests/utilities/test_model_summary.py
index 669892984f0cf..bc465e78fee38 100644
--- a/tests/utilities/test_model_summary.py
+++ b/tests/utilities/test_model_summary.py
@@ -143,17 +143,11 @@ def test_invalid_weights_summmary():
     """Test that invalid value for weights_summary raises an error."""
     model = LightningModule()
 
-    with pytest.raises(MisconfigurationException, match="`mode` can be None, .* got temp"):
-        summarize(model, mode="temp")
-
     with pytest.raises(
         MisconfigurationException, match="`weights_summary` can be None, .* got temp"
     ), pytest.deprecated_call(match="weights_summary=temp)` is deprecated"):
         Trainer(weights_summary="temp")
 
-    with pytest.raises(MisconfigurationException, match="mode` can be .* got temp"):
-        ModelSummary(model, mode="temp")
-
     with pytest.raises(ValueError, match="max_depth` can be .* got temp"):
         ModelSummary(model, max_depth="temp")
 
@@ -334,21 +328,6 @@ def test_lazy_model_summary():
             assert summary.trainable_parameters == 7
 
 
-def test_max_depth_equals_mode_interface():
-    """Test summarize(model, full/top) interface mapping matches max_depth."""
-    model = DeepNestedModel()
-
-    with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"):
-        summary_top = summarize(model, mode="top")
-    summary_0 = summarize(model, max_depth=1)
-    assert str(summary_top) == str(summary_0)
-
-    with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"):
-        summary_full = summarize(model, mode="full")
-    summary_minus1 = summarize(model, max_depth=-1)
-    assert str(summary_full) == str(summary_minus1)
-
-
 @pytest.mark.parametrize("max_depth", [-1, 0, 1, 3, 999])
 def test_max_depth_param(max_depth):
     """Test that only the modules up to the desired depth are shown."""

From a8c2725ff8230a450f76f396fc372b7d5cb00076 Mon Sep 17 00:00:00 2001
From: Rohit Gupta <rohitgr1998@gmail.com>
Date: Sun, 14 Nov 2021 01:02:30 +0530
Subject: [PATCH 08/18] remove deprecated signature for
 `transfer_batch_to_device` (#10480)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
---
 CHANGELOG.md                            |  4 ++++
 pytorch_lightning/core/hooks.py         |  2 +-
 pytorch_lightning/core/lightning.py     | 12 +-----------
 tests/accelerators/test_dp.py           |  2 +-
 tests/deprecated_api/test_remove_1-6.py | 10 ----------
 5 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3795f99684f47..bbad7fb1d4be2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -109,6 +109,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
   * ([#10403](https://github.com/PyTorchLightning/pytorch-lightning/pull/10403))
   * ([#10448](https://github.com/PyTorchLightning/pytorch-lightning/pull/10448))
 
+- Removed deprecated signature for `transfer_batch_to_device` hook. The new argument `dataloader_idx` is now required ([#10480](https://github.com/PyTorchLightning/pytorch-lightning/pull/10480))
+
 
 - Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451))
 
@@ -119,6 +121,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
 
 
+- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
+
 ### Fixed
 
 - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702))
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 0c47b1ec97557..376a6919ca43f 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -693,7 +693,7 @@ def transfer_batch_to_device(self, batch, device, dataloader_idx):
                     # skip device transfer for the first dataloader or anything you wish
                     pass
                 else:
-                    batch = super().transfer_batch_to_device(data, device)
+                    batch = super().transfer_batch_to_device(data, device, dataloader_idx)
                 return batch
 
         Raises:
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index a2fc9d1a21d4b..b6f064d7d9802 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -262,17 +262,7 @@ def _apply_batch_transfer_handler(
     ) -> Any:
         device = device or self.device
         batch = self.on_before_batch_transfer(batch, dataloader_idx)
-
-        if is_param_in_hook_signature(self.transfer_batch_to_device, "dataloader_idx"):
-            batch = self.transfer_batch_to_device(batch, device, dataloader_idx)
-        else:
-            warning_cache.deprecation(
-                "`transfer_batch_to_device` hook signature has changed in v1.4."
-                " `dataloader_idx` parameter has been added to it. Support for"
-                " the old signature will be removed in v1.6"
-            )
-            batch = self.transfer_batch_to_device(batch, device)
-
+        batch = self.transfer_batch_to_device(batch, device, dataloader_idx)
         batch = self.on_after_batch_transfer(batch, dataloader_idx)
         return batch
 
diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py
index 38a2caceed859..7313728256b4e 100644
--- a/tests/accelerators/test_dp.py
+++ b/tests/accelerators/test_dp.py
@@ -143,7 +143,7 @@ def test_dp_raise_exception_with_batch_transfer_hooks(tmpdir, monkeypatch):
     monkeypatch.setattr("torch.cuda.device_count", lambda: 2)
 
     class CustomModel(BoringModel):
-        def transfer_batch_to_device(self, batch, device):
+        def transfer_batch_to_device(self, batch, device, dataloader_idx):
             batch = batch.to(device)
             return batch
 
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index 0e450caa18b62..d2f3cec5cba4f 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -21,16 +21,6 @@
 from tests.helpers import BoringModel
 
 
-def test_old_transfer_batch_to_device_hook(tmpdir):
-    class OldModel(BoringModel):
-        def transfer_batch_to_device(self, batch, device):
-            return super().transfer_batch_to_device(batch, device, None)
-
-    trainer = Trainer(default_root_dir=tmpdir, limit_train_batches=1, limit_val_batches=0, max_epochs=1)
-    with pytest.deprecated_call(match="old signature will be removed in v1.6"):
-        trainer.fit(OldModel())
-
-
 def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir):
     model = BoringModel()
 

From 7a9a08c5d3ca4699fb439f691c40e1320b37507a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Sat, 13 Nov 2021 21:35:03 +0100
Subject: [PATCH 09/18] Drop torch 1.6 testing (#10390)

* Drop torch 1.6 support

* Drop 1.6 support

* Update CHANGELOG

* Fixes

* Split change

* Undo change

* 1.7 -> 1.7.1

https://github.com/pytorch/pytorch/issues/47354

* Force trigger nightly

* Update .github/workflows/events-nightly.yml

Co-authored-by: Aki Nitta <nitta@akihironitta.com>

* Revert 1.7.1 change - try wildcard

* Update adjust versions and test it

* Undo test changes

* Revert "Undo test changes"

This reverts commit 3a6acadd115e86f02d83a788f1978372ab6764f3.

* Update CHANGELOG.md

Co-authored-by: Aki Nitta <nitta@akihironitta.com>
---
 pytorch_lightning/callbacks/quantization.py   |  2 +-
 pytorch_lightning/distributed/dist.py         |  5 +-
 .../overrides/torch_distributed.py            | 99 -------------------
 .../plugins/training_type/ddp.py              | 16 ++-
 .../plugins/training_type/ddp_spawn.py        | 17 ++--
 .../connectors/accelerator_connector.py       |  5 +-
 pytorch_lightning/utilities/__init__.py       |  1 -
 pytorch_lightning/utilities/auto_restart.py   | 35 ++-----
 pytorch_lightning/utilities/cloud_io.py       |  9 +-
 pytorch_lightning/utilities/imports.py        |  3 +-
 pytorch_lightning/utilities/seed.py           |  6 +-
 tests/callbacks/test_quantization.py          |  2 +-
 tests/conftest.py                             |  6 +-
 tests/core/test_metric_result_integration.py  |  5 +-
 tests/helpers/datamodules.py                  |  1 -
 .../loops/optimization/test_optimizer_loop.py |  2 -
 tests/loops/test_loops.py                     |  5 -
 tests/plugins/test_double_plugin.py           |  6 +-
 tests/profiler/test_profiler.py               |  4 +-
 .../connectors/test_checkpoint_connector.py   |  2 -
 .../connectors/test_signal_connector.py       |  2 +-
 tests/trainer/test_data_loading.py            |  2 +-
 tests/trainer/test_supporters.py              |  2 -
 tests/utilities/test_auto_restart.py          |  6 +-
 24 files changed, 40 insertions(+), 203 deletions(-)
 delete mode 100644 pytorch_lightning/overrides/torch_distributed.py

diff --git a/pytorch_lightning/callbacks/quantization.py b/pytorch_lightning/callbacks/quantization.py
index ca82a574f71d1..42f0d575ffb6f 100644
--- a/pytorch_lightning/callbacks/quantization.py
+++ b/pytorch_lightning/callbacks/quantization.py
@@ -28,7 +28,7 @@
 if _TORCH_GREATER_EQUAL_1_8:
     from torch.quantization import FakeQuantizeBase
 else:
-    # For torch 1.6 and 1.7.
+    # For torch 1.7.
     from torch.quantization import FakeQuantize as FakeQuantizeBase
 
 import pytorch_lightning as pl
diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py
index 082e0c617a5f7..a0054d17936b0 100644
--- a/pytorch_lightning/distributed/dist.py
+++ b/pytorch_lightning/distributed/dist.py
@@ -13,7 +13,8 @@
 # limitations under the License.
 from typing import Any
 
-from pytorch_lightning.overrides.torch_distributed import broadcast_object_list
+import torch.distributed
+
 from pytorch_lightning.utilities import rank_zero_deprecation
 from pytorch_lightning.utilities.distributed import group as _group
 
@@ -40,6 +41,6 @@ def broadcast(self, obj: Any, group=_group.WORLD):
         if self.rank != 0:
             obj = [None] * len(obj)
 
-        broadcast_object_list(obj, 0, group=group or _group.WORLD)
+        torch.distributed.broadcast_object_list(obj, 0, group=group or _group.WORLD)
 
         return obj[0]
diff --git a/pytorch_lightning/overrides/torch_distributed.py b/pytorch_lightning/overrides/torch_distributed.py
deleted file mode 100644
index 3cbbe5ea760ff..0000000000000
--- a/pytorch_lightning/overrides/torch_distributed.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import logging
-import pickle
-
-import torch
-
-from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
-
-log = logging.getLogger(__name__)
-
-if torch.distributed.is_available():
-    from torch.distributed import Backend, broadcast, get_backend, get_rank, GroupMember
-
-# The code underneath is taken from PyTorch `torch/distributed/distributed_c10d.py`
-# and enable broadcasting for PyTorch 1.6 and lower.
-
-
-# https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L160
-def _rank_not_in_group(group):
-    """Helper that checks if the current process's rank is not in a given group."""
-    if group is None:
-        return False
-    return group == GroupMember.NON_GROUP_MEMBER
-
-
-# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L1164
-def _object_to_tensor(obj):
-    buffer = pickle.dumps(obj)
-    byte_storage = torch.ByteStorage.from_buffer(buffer)  # type: ignore[attr-defined]
-    byte_tensor = torch.ByteTensor(byte_storage)
-    local_size = torch.LongTensor([byte_tensor.numel()])
-    return byte_tensor, local_size
-
-
-# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py
-def _tensor_to_object(tensor, tensor_size):
-    buf = tensor.numpy().tobytes()[:tensor_size]
-    out = pickle.loads(buf)
-    return out
-
-
-# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L1327
-def _broadcast_object_list(object_list, src=0, group=None):
-    if _rank_not_in_group(group):
-        return
-
-    my_rank = get_rank()
-    # Serialize object_list elements to tensors on src rank.
-    if my_rank == src:
-        tensor_list, size_list = zip(*(_object_to_tensor(obj) for obj in object_list))
-        object_sizes_tensor = torch.cat(size_list)
-    else:
-        object_sizes_tensor = torch.LongTensor(len(object_list))
-
-    group_backend = get_backend(group)
-    is_nccl_backend = group_backend == Backend.NCCL
-    current_device = torch.device("cpu")
-    if is_nccl_backend:
-        # See note about using torch.cuda.current_device() here in docstring.
-        # We cannot simply use my_rank since rank == device is not necessarily
-        # true.
-        current_device = torch.device("cuda", torch.cuda.current_device())
-        object_sizes_tensor = object_sizes_tensor.to(current_device)
-        object_sizes_tensor = object_sizes_tensor.to(current_device)
-
-    # Broadcast object sizes
-    broadcast(object_sizes_tensor, src=src, group=group)
-
-    # Concatenate and broadcast serialized object tensors
-    if my_rank == src:
-        object_tensor = torch.cat(tensor_list)
-    else:
-        object_tensor = torch.ByteTensor(torch.sum(object_sizes_tensor).item())
-
-    if is_nccl_backend:
-        object_tensor = object_tensor.to(current_device)
-
-    broadcast(object_tensor, src=src, group=group)
-
-    # Deserialize objects using their stored sizes.
-    offset = 0
-    if my_rank != src:
-        for i, obj_size in enumerate(object_sizes_tensor):
-            obj_view = object_tensor[offset : offset + obj_size]
-            obj_view = obj_view.type(torch.ByteTensor)  # type: ignore[call-overload]
-            offset += obj_size
-            object_list[i] = _tensor_to_object(obj_view, obj_size)
-
-
-if not torch.distributed.is_available():
-    # avoid failures on early PyTorch versions for Windows where
-    # not all functions used in `broadcast_object_list` are available.
-    def _broadcast_noop(obj, *_, **__):
-        return obj
-
-    broadcast_object_list = _broadcast_noop
-elif _TORCH_GREATER_EQUAL_1_8:
-    from torch.distributed.distributed_c10d import broadcast_object_list
-else:
-    broadcast_object_list = _broadcast_object_list
diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
index c528be4c8bfef..84e9b55b9ee08 100644
--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@@ -34,7 +34,6 @@
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.overrides import LightningDistributedModule
 from pytorch_lightning.overrides.distributed import prepare_for_backward
-from pytorch_lightning.overrides.torch_distributed import broadcast_object_list
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
@@ -43,7 +42,6 @@
     _FAIRSCALE_AVAILABLE,
     _HYDRA_AVAILABLE,
     _IS_WINDOWS,
-    _TORCH_GREATER_EQUAL_1_7,
     _TORCH_GREATER_EQUAL_1_8,
     _TORCH_GREATER_EQUAL_1_9,
     _TORCH_GREATER_EQUAL_1_10,
@@ -255,15 +253,13 @@ def pre_configure_ddp(self):
         # when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True.
         # This flag does come with a performance hit, so it is suggested to disable in cases where it is possible.
         self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get("find_unused_parameters", True)
-        # todo: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
-        if (
-            _TORCH_GREATER_EQUAL_1_7
-            and not self.lightning_module.automatic_optimization
-            and not self._ddp_kwargs.get("find_unused_parameters", False)
+        if not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get(
+            "find_unused_parameters", False
         ):
+            # TODO: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
             rank_zero_warn(
-                "From PyTorch 1.7.0, Lightning ``manual_optimization`` needs to set ``find_unused_parameters=True`` "
-                "to properly work with DDP."
+                "From PyTorch 1.7.0, Lightning `manual_optimization` needs to set `find_unused_parameters=True` to"
+                " properly work with DDP. Using `find_unused_parameters=True`."
             )
             self._ddp_kwargs["find_unused_parameters"] = True
 
@@ -371,7 +367,7 @@ def broadcast(self, obj: object, src: int = 0) -> object:
         obj = [obj]
         if self.global_rank != src:
             obj = [None]
-        broadcast_object_list(obj, src, group=_group.WORLD)
+        torch.distributed.broadcast_object_list(obj, src, group=_group.WORLD)
         return obj[0]
 
     def pre_backward(self, closure_loss: torch.Tensor) -> None:
diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
index 926409925b9c7..677e031cd04af 100644
--- a/pytorch_lightning/plugins/training_type/ddp_spawn.py
+++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -27,12 +27,11 @@
 import pytorch_lightning as pl
 from pytorch_lightning.overrides import LightningDistributedModule
 from pytorch_lightning.overrides.distributed import prepare_for_backward
-from pytorch_lightning.overrides.torch_distributed import broadcast_object_list
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8, rank_zero_warn
+from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_8, rank_zero_warn
 from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
 from pytorch_lightning.utilities.cloud_io import atomic_save
 from pytorch_lightning.utilities.cloud_io import load as pl_load
@@ -238,15 +237,13 @@ def pre_configure_ddp(self):
         # when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True.
         # This flag does come with a performance hit, so it is suggested to disable in cases where it is possible.
         self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get("find_unused_parameters", True)
-        # todo: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
-        if (
-            _TORCH_GREATER_EQUAL_1_7
-            and not self.lightning_module.automatic_optimization
-            and not self._ddp_kwargs.get("find_unused_parameters", False)
+        if not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get(
+            "find_unused_parameters", False
         ):
+            # TODO: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
             rank_zero_warn(
-                "From PyTorch 1.7.0, Lightning ``manual_optimization`` needs to set ``find_unused_parameters=True`` "
-                "to properly work with DDP."
+                "From PyTorch 1.7.0, Lightning `manual_optimization` needs to set `find_unused_parameters=True` to"
+                " properly work with DDP. Using `find_unused_parameters=True`."
             )
             self._ddp_kwargs["find_unused_parameters"] = True
 
@@ -323,7 +320,7 @@ def broadcast(self, obj: object, src: int = 0) -> object:
         obj = [obj]
         if self.global_rank != src:
             obj = [None]
-        broadcast_object_list(obj, src, group=_group.WORLD)
+        torch.distributed.broadcast_object_list(obj, src, group=_group.WORLD)
         return obj[0]
 
     def model_to_device(self):
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index e15f7bb853db8..43eb65ce21a22 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -74,7 +74,6 @@
 from pytorch_lightning.utilities.imports import (
     _HOROVOD_AVAILABLE,
     _IPU_AVAILABLE,
-    _TORCH_GREATER_EQUAL_1_7,
     _TORCH_GREATER_EQUAL_1_8,
     _TPU_AVAILABLE,
 )
@@ -190,10 +189,8 @@ def _init_deterministic(self, deterministic: bool) -> None:
         self.deterministic = deterministic
         if _TORCH_GREATER_EQUAL_1_8:
             torch.use_deterministic_algorithms(deterministic)
-        elif _TORCH_GREATER_EQUAL_1_7:
+        else:
             torch.set_deterministic(deterministic)
-        else:  # the minimum version Lightning supports is PyTorch 1.6
-            torch._set_deterministic(deterministic)
         if deterministic:
             # fixing non-deterministic part of horovod
             # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py
index 158d7356c91ce..7343e28d6d811 100644
--- a/pytorch_lightning/utilities/__init__.py
+++ b/pytorch_lightning/utilities/__init__.py
@@ -44,7 +44,6 @@
     _OMEGACONF_AVAILABLE,
     _POPTORCH_AVAILABLE,
     _RICH_AVAILABLE,
-    _TORCH_GREATER_EQUAL_1_7,
     _TORCH_GREATER_EQUAL_1_8,
     _TORCH_GREATER_EQUAL_1_9,
     _TORCH_GREATER_EQUAL_1_10,
diff --git a/pytorch_lightning/utilities/auto_restart.py b/pytorch_lightning/utilities/auto_restart.py
index f0b50103cf2f2..ef52717636d90 100644
--- a/pytorch_lightning/utilities/auto_restart.py
+++ b/pytorch_lightning/utilities/auto_restart.py
@@ -305,9 +305,6 @@ def _wrap_generator_samplers(self) -> None:
         # access wrapped dataset attributes
         dataset_dict = self.dataset.__dict__
 
-        # create a tuple of sampler names
-        samplers_names = tuple(v.__class__.__name__ for k, v in dataset_dict.items() if isinstance(v, Sampler))
-
         # create a dictionary of generator present within the dataset attributes
         dataset_sampler_generators = {k: v for k, v in dataset_dict.items() if isinstance(v, (Generator, Iterator))}
 
@@ -318,31 +315,17 @@ def _wrap_generator_samplers(self) -> None:
             if isinstance(generator, Sampler):
                 continue
 
-            # used to handle a weird behaviour from PyTorch 1.6
-            # where the sampler is converted to a list_iterator
-            is_legacy = False
-
-            if isinstance(generator, Generator):
-                # Generator name have the  the form `SamplerName.__iter__`
-                generator_name = generator.__qualname__.split(".")[0]
-            else:
-                # assume the retrieved iterator is coming from sampler.
-                is_legacy = True
-
-            # validate the base generator name matches a sampler name.
-            if is_legacy or any(sampler_name == generator_name for sampler_name in samplers_names):
-
-                # wrap the generator into a `FastForwardSampler`
-                sampler = FastForwardSampler(generator, attr_name=generator_attr_name)
+            # wrap the generator into a `FastForwardSampler`
+            sampler = FastForwardSampler(generator, attr_name=generator_attr_name)
 
-                # if `CaptureIterableDataset` was available, the sampler should reload its own state.
-                if self._state_dict is not None:
-                    sampler.load_state_dict(self._state_dict[generator_attr_name])
-                # store the samplers
-                self.samplers[generator_attr_name] = sampler
+            # if `CaptureIterableDataset` was available, the sampler should reload its own state.
+            if self._state_dict is not None:
+                sampler.load_state_dict(self._state_dict[generator_attr_name])
+            # store the samplers
+            self.samplers[generator_attr_name] = sampler
 
-                # replace generator with the generator from the `FastForwardSampler`.
-                dataset_dict[generator_attr_name] = iter(sampler)
+            # replace generator with the generator from the `FastForwardSampler`.
+            dataset_dict[generator_attr_name] = iter(sampler)
 
         self.reset_on_epoch()
 
diff --git a/pytorch_lightning/utilities/cloud_io.py b/pytorch_lightning/utilities/cloud_io.py
index 9b40f6d69cfad..2c9eb1f768d3c 100644
--- a/pytorch_lightning/utilities/cloud_io.py
+++ b/pytorch_lightning/utilities/cloud_io.py
@@ -19,7 +19,6 @@
 import fsspec
 import torch
 from fsspec.implementations.local import AbstractFileSystem, LocalFileSystem
-from packaging.version import Version
 
 
 def load(
@@ -59,12 +58,6 @@ def atomic_save(checkpoint: Dict[str, Any], filepath: Union[str, Path]) -> None:
     """
 
     bytesbuffer = io.BytesIO()
-    # Can't use the new zipfile serialization for 1.6.0 because there's a bug in
-    # torch.hub.load_state_dict_from_url() that prevents it from loading the new files.
-    # More details can be found here: https://github.com/pytorch/pytorch/issues/42239
-    if Version(torch.__version__).release[:3] == (1, 6, 0):
-        torch.save(checkpoint, bytesbuffer, _use_new_zipfile_serialization=False)
-    else:
-        torch.save(checkpoint, bytesbuffer)
+    torch.save(checkpoint, bytesbuffer)
     with fsspec.open(filepath, "wb") as f:
         f.write(bytesbuffer.getvalue())
diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py
index edf5f75aee6a9..5db24fe0f5cff 100644
--- a/pytorch_lightning/utilities/imports.py
+++ b/pytorch_lightning/utilities/imports.py
@@ -70,7 +70,6 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
 
 _IS_WINDOWS = platform.system() == "Windows"
 _IS_INTERACTIVE = hasattr(sys, "ps1")  # https://stackoverflow.com/a/64523765
-_TORCH_GREATER_EQUAL_1_7 = _compare_version("torch", operator.ge, "1.7.0")
 _TORCH_GREATER_EQUAL_1_8 = _compare_version("torch", operator.ge, "1.8.0")
 _TORCH_GREATER_EQUAL_1_8_1 = _compare_version("torch", operator.ge, "1.8.1")
 _TORCH_GREATER_EQUAL_1_9 = _compare_version("torch", operator.ge, "1.9.0")
@@ -112,4 +111,4 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
 
 # experimental feature within PyTorch Lightning.
 def _fault_tolerant_training() -> bool:
-    return _TORCH_GREATER_EQUAL_1_7 and int(os.getenv("PL_FAULT_TOLERANT_TRAINING", 0))
+    return bool(int(os.getenv("PL_FAULT_TOLERANT_TRAINING", 0)))
diff --git a/pytorch_lightning/utilities/seed.py b/pytorch_lightning/utilities/seed.py
index 3b20c53353411..e8fc243f484f8 100644
--- a/pytorch_lightning/utilities/seed.py
+++ b/pytorch_lightning/utilities/seed.py
@@ -21,7 +21,7 @@
 import numpy as np
 import torch
 
-from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, rank_zero_warn
+from pytorch_lightning.utilities import rank_zero_warn
 from pytorch_lightning.utilities.distributed import rank_zero_only
 
 log = logging.getLogger(__name__)
@@ -113,9 +113,7 @@ def pl_worker_init_function(worker_id: int, rank: Optional[int] = None) -> None:
     np.random.seed(ss.generate_state(4))
     # Spawn distinct SeedSequences for the PyTorch PRNG and the stdlib random module
     torch_ss, stdlib_ss = ss.spawn(2)
-    # PyTorch 1.7 and above takes a 64-bit seed
-    dtype = np.uint64 if _TORCH_GREATER_EQUAL_1_7 else np.uint32
-    torch.manual_seed(torch_ss.generate_state(1, dtype=dtype)[0])
+    torch.manual_seed(torch_ss.generate_state(1, dtype=np.uint64)[0])
     # use 128 bits expressed as an integer
     stdlib_seed = (stdlib_ss.generate_state(2, dtype=np.uint64).astype(object) * [1 << 64, 1]).sum()
     random.seed(stdlib_seed)
diff --git a/tests/callbacks/test_quantization.py b/tests/callbacks/test_quantization.py
index fa2ee767bdc8c..e3dfb9b6a7edf 100644
--- a/tests/callbacks/test_quantization.py
+++ b/tests/callbacks/test_quantization.py
@@ -31,7 +31,7 @@
 if _TORCH_GREATER_EQUAL_1_8:
     from torch.quantization import FakeQuantizeBase
 else:
-    # For torch 1.6 and 1.7.
+    # For torch 1.7.
     from torch.quantization import FakeQuantize as FakeQuantizeBase
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 860f9357e4636..3d5548b7bd0ae 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,7 +22,7 @@
 import torch.distributed
 
 from pytorch_lightning.plugins.environments.lightning_environment import find_free_network_port
-from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8
+from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
 from tests import _PATH_DATASETS
 
 
@@ -95,10 +95,8 @@ def reset_deterministic_algorithm():
     yield
     if _TORCH_GREATER_EQUAL_1_8:
         torch.use_deterministic_algorithms(False)
-    elif _TORCH_GREATER_EQUAL_1_7:
+    else:
         torch.set_deterministic(False)
-    else:  # the minimum version Lightning supports is PyTorch 1.6
-        torch._set_deterministic(False)
 
 
 @pytest.fixture
diff --git a/tests/core/test_metric_result_integration.py b/tests/core/test_metric_result_integration.py
index 12fe7f2fb4652..9ec2f150ac5d4 100644
--- a/tests/core/test_metric_result_integration.py
+++ b/tests/core/test_metric_result_integration.py
@@ -33,7 +33,7 @@
     ResultCollection,
     ResultMetric,
 )
-from pytorch_lightning.utilities.imports import _fault_tolerant_training, _TORCH_GREATER_EQUAL_1_7
+from pytorch_lightning.utilities.imports import _fault_tolerant_training
 from tests.helpers import BoringModel
 from tests.helpers.runif import RunIf
 
@@ -470,21 +470,18 @@ def on_epoch_end(self) -> None:
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
 def test_result_collection_reload(tmpdir):
     result_collection_reload(default_root_dir=tmpdir)
 
 
 @RunIf(min_gpus=1)
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
 def test_result_collection_reload_1_gpu_ddp(tmpdir):
     result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=1)
 
 
 @RunIf(min_gpus=2, special=True)
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
 def test_result_collection_reload_2_gpus(tmpdir):
     result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=2)
 
diff --git a/tests/helpers/datamodules.py b/tests/helpers/datamodules.py
index 0cb178a749a09..78e806b37937e 100644
--- a/tests/helpers/datamodules.py
+++ b/tests/helpers/datamodules.py
@@ -46,7 +46,6 @@ def prepare_data(self):
         self.dataset_cls(self.data_dir, train=False, download=True)
 
     def setup(self, stage: Optional[str] = None):
-        # TODO: need to split using random_split once updated to torch >= 1.6
         if stage == "fit" or stage is None:
             self.mnist_train = self.dataset_cls(self.data_dir, train=True)
         if stage == "test" or stage is None:
diff --git a/tests/loops/optimization/test_optimizer_loop.py b/tests/loops/optimization/test_optimizer_loop.py
index 7e17cbbd56645..ae77c4387a398 100644
--- a/tests/loops/optimization/test_optimizer_loop.py
+++ b/tests/loops/optimization/test_optimizer_loop.py
@@ -24,7 +24,6 @@
 from pytorch_lightning.loops.optimization.optimizer_loop import ClosureResult
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
-from tests.helpers.runif import RunIf
 
 
 def test_closure_result_deepcopy():
@@ -140,7 +139,6 @@ class CustomException(Exception):
     pass
 
 
-@RunIf(min_torch="1.7.0")
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 @pytest.mark.parametrize("stop_epoch", (0, 1))
 @pytest.mark.parametrize("stop_batch", (0, 1, 2))
diff --git a/tests/loops/test_loops.py b/tests/loops/test_loops.py
index bad9a717d1629..8166f14754a51 100644
--- a/tests/loops/test_loops.py
+++ b/tests/loops/test_loops.py
@@ -253,7 +253,6 @@ def on_load_checkpoint(self, state_dict: Dict) -> None:
     assert state_dict == {"state_dict": {"a": 1}, "progress": {"increment": 1}}
 
 
-@RunIf(min_torch="1.7.0")
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 @pytest.mark.parametrize("stop_epoch", (1, 2))
 @pytest.mark.parametrize("stop_batch", (1, 2))
@@ -323,7 +322,6 @@ def val_dataloader(self):
     assert trainer.fit_loop.epoch_loop.val_loop.epoch_loop.batch_progress.state_dict() == expected
 
 
-@RunIf(min_torch="1.7.0")
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 @pytest.mark.parametrize("accumulate_grad_batches", (1, 2, 3))
 @pytest.mark.parametrize("n_optimizers", (1, 3, 5))
@@ -526,7 +524,6 @@ def configure_optimizers_multiple(self):
     assert state_dict["epoch_progress"]["current"]["started"] == stop_epoch
 
 
-@RunIf(min_torch="1.7.0")
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 @pytest.mark.parametrize("n_optimizers", (1, 3, 5))
 def test_loop_state_on_complete_run(n_optimizers, tmpdir):
@@ -662,7 +659,6 @@ def train_dataloader(self):
     assert checkpoint["loops"]["fit_loop"] == expected
 
 
-@RunIf(min_torch="1.7.0")
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
 def test_fit_loop_reset(tmpdir):
     """Test that the reset logic in fit- and epoch loop is aware of whether the loop is restarting from a completed
@@ -752,7 +748,6 @@ def test_fit_loop_reset(tmpdir):
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@RunIf(min_torch="1.7.0")
 @pytest.mark.parametrize(
     ["train_datasets", "val_datasets"],
     [([RandomDataset], [RandomDataset]), ([RandomDataset], [RandomDataset, RandomDataset])],
diff --git a/tests/plugins/test_double_plugin.py b/tests/plugins/test_double_plugin.py
index cadd02c692af5..b3fdf87428522 100644
--- a/tests/plugins/test_double_plugin.py
+++ b/tests/plugins/test_double_plugin.py
@@ -20,7 +20,6 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.plugins import DoublePrecisionPlugin
-from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7
 from tests.helpers.boring_model import BoringModel, RandomDataset
 from tests.helpers.runif import RunIf
 
@@ -137,10 +136,7 @@ def on_fit_start(self):
     [
         DoublePrecisionBoringModel,
         DoublePrecisionBoringModelNoForward,
-        pytest.param(
-            DoublePrecisionBoringModelComplexBuffer,
-            marks=pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="torch.complex not available"),
-        ),
+        DoublePrecisionBoringModelComplexBuffer,
     ],
 )
 def test_double_precision(tmpdir, boring_model):
diff --git a/tests/profiler/test_profiler.py b/tests/profiler/test_profiler.py
index c4bcb56ca87bb..37756fcc62351 100644
--- a/tests/profiler/test_profiler.py
+++ b/tests/profiler/test_profiler.py
@@ -26,7 +26,6 @@
 from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
 from pytorch_lightning.profiler import AdvancedProfiler, PassThroughProfiler, PyTorchProfiler, SimpleProfiler
 from pytorch_lightning.profiler.pytorch import RegisterRecordFunction
-from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE
 from tests.helpers import BoringModel, ManualOptimBoringModel
@@ -394,8 +393,7 @@ def test_pytorch_profiler_nested(tmpdir):
 
     names = {"a", "b", "c"}
     ops = {"add", "empty", "fill_", "ones", "zero_", "zeros"}
-    if _TORCH_GREATER_EQUAL_1_7:
-        ops = {"aten::" + op for op in ops}
+    ops = {"aten::" + op for op in ops}
 
     expected = names.union(ops)
     assert events_name == expected, (events_name, torch.__version__, platform.system())
diff --git a/tests/trainer/connectors/test_checkpoint_connector.py b/tests/trainer/connectors/test_checkpoint_connector.py
index 6b408845ed879..4a42265eb21b0 100644
--- a/tests/trainer/connectors/test_checkpoint_connector.py
+++ b/tests/trainer/connectors/test_checkpoint_connector.py
@@ -21,7 +21,6 @@
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.trainer.states import TrainerFn
 from tests.helpers import BoringModel
-from tests.helpers.runif import RunIf
 
 
 class HPCHookdedModel(BoringModel):
@@ -133,7 +132,6 @@ def test_hpc_max_ckpt_version(tmpdir):
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@RunIf(min_torch="1.7.0")
 def test_loops_restore(tmpdir):
     """Test that required loop state_dict is loaded correctly by checkpoint connector."""
     model = BoringModel()
diff --git a/tests/trainer/connectors/test_signal_connector.py b/tests/trainer/connectors/test_signal_connector.py
index 3da8c100fe40c..aa5407e2f1228 100644
--- a/tests/trainer/connectors/test_signal_connector.py
+++ b/tests/trainer/connectors/test_signal_connector.py
@@ -26,7 +26,7 @@
 
 @pytest.mark.parametrize("register_handler", [False, True])
 @pytest.mark.parametrize("terminate_gracefully", [False, True])
-@RunIf(min_torch="1.7.0", skip_windows=True)
+@RunIf(skip_windows=True)
 def test_fault_tolerant_sig_handler(register_handler, terminate_gracefully, tmpdir):
 
     # hack to reset the signal
diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 0f6abd38e6836..97097b2074ca1 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -26,7 +26,7 @@
 from tests.helpers.runif import RunIf
 
 
-@RunIf(skip_windows=True, min_torch="1.7.0")
+@RunIf(skip_windows=True)
 @pytest.mark.parametrize("mode", (1, 2, 3))
 def test_replace_distributed_sampler(tmpdir, mode):
     class IndexedRandomDataset(RandomDataset):
diff --git a/tests/trainer/test_supporters.py b/tests/trainer/test_supporters.py
index e4598550c24fb..694d473155439 100644
--- a/tests/trainer/test_supporters.py
+++ b/tests/trainer/test_supporters.py
@@ -35,7 +35,6 @@
 from pytorch_lightning.utilities.auto_restart import CaptureMapDataset, FastForwardSampler
 from pytorch_lightning.utilities.data import get_len
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_7
 from tests.helpers.boring_model import RandomDataset
 
 
@@ -312,7 +311,6 @@ def test_nested_calc_num_data(input_data, compute_func, expected_length):
     assert calculated_length == expected_length
 
 
-@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
 @mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1", "PL_TRAINER_GPUS": "2"})
 @mock.patch("torch.cuda.device_count", return_value=2)
 @mock.patch("torch.cuda.is_available", return_value=True)
diff --git a/tests/utilities/test_auto_restart.py b/tests/utilities/test_auto_restart.py
index 4e3385cebecbc..b36a9d1d76941 100644
--- a/tests/utilities/test_auto_restart.py
+++ b/tests/utilities/test_auto_restart.py
@@ -690,7 +690,6 @@ def create_dataloader():
     }
 
 
-@RunIf(min_torch="1.7.0")
 @pytest.mark.parametrize("use_fault_tolerant", ["0", "1"])
 def test_data_loading_wraps_dataset_and_samplers(use_fault_tolerant, tmpdir):
     """This test ensures the dataset and sampler are properly wrapped when fault tolerant is enabled."""
@@ -785,7 +784,6 @@ def __len__(self):
 
 # TODO: test with `RandomGeneratorGetItemDataset`
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@RunIf(min_torch="1.7.0")
 @pytest.mark.parametrize(
     "dataset_class",
     [
@@ -921,7 +919,6 @@ def _run_training(trainer_kwargs, dataset_classes, fail_on_step: int = -1, ckpt_
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@RunIf(min_torch="1.7.0")
 @pytest.mark.parametrize(
     "dataset_classes",
     [
@@ -975,7 +972,6 @@ def test_dataset_rng_states_restart_with_lightning(tmpdir, dataset_classes, mult
 
 
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@RunIf(min_torch="1.7.0")
 @pytest.mark.parametrize(
     ["train_datasets", "val_datasets"],
     [
@@ -1139,7 +1135,7 @@ def _fit_model(
 @pytest.mark.parametrize("failure_on_training", [False, True])
 @pytest.mark.parametrize("failure_on_step", [False, True])
 @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
-@RunIf(min_torch="1.7.0", skip_windows=True)
+@RunIf(skip_windows=True)
 def test_auto_restart_under_signal(on_last_batch, val_check_interval, failure_on_training, failure_on_step, tmpdir):
     """This test asserts that if a signal is being sent during the training / validation phase, the model should
     restart in a reproducible way."""

From ffb40060c099f08f1f5b3e94ea58284e6503e556 Mon Sep 17 00:00:00 2001
From: thomas chaton <thomas@grid.ai>
Date: Mon, 15 Nov 2021 10:03:46 +0000
Subject: [PATCH 10/18] shutdown workers on failure (#10463)

---
 CHANGELOG.md                         |  3 +++
 pytorch_lightning/trainer/trainer.py |  2 ++
 tests/loops/test_loops.py            | 34 ++++++++++++++++++++++------
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bbad7fb1d4be2..0ff6ebbbc6512 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -131,6 +131,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374))
 
 
+- Fixed an issue that prevented the Trainer to shutdown workers when execution is interrupted due to failure([#10463](https://github.com/PyTorchLightning/pytorch-lightning/issues/10463))
+
+
 - Squeeze the early stopping monitor to remove empty tensor dimensions ([#10461](https://github.com/PyTorchLightning/pytorch-lightning/issues/10461))
 
 
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 396289000251d..b84f03393309b 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -694,6 +694,8 @@ def _call_and_handle_interrupt(self, trainer_fn: Callable, *args: Any, **kwargs:
             # reset bookkeeping
             self.state.stage = None
             self.on_exception(exception)
+            # shutdown workers
+            self._data_connector.teardown()
             raise
 
     def fit(
diff --git a/tests/loops/test_loops.py b/tests/loops/test_loops.py
index 8166f14754a51..63a2211934ece 100644
--- a/tests/loops/test_loops.py
+++ b/tests/loops/test_loops.py
@@ -24,7 +24,7 @@
 
 from pl_examples.bug_report_model import RandomDataset
 from pytorch_lightning import LightningModule, Trainer
-from pytorch_lightning.callbacks import ModelCheckpoint
+from pytorch_lightning.callbacks import Callback, ModelCheckpoint
 from pytorch_lightning.loops import Loop, TrainingBatchLoop
 from pytorch_lightning.trainer.progress import BaseProgress
 from tests.helpers import BoringModel
@@ -907,8 +907,10 @@ def val_dataloader(self):
 
 
 @RunIf(min_torch="1.8.0")
-@pytest.mark.parametrize("persistent_workers", (False, True))
-def test_workers_are_shutdown(tmpdir, persistent_workers):
+@pytest.mark.parametrize("should_fail", [False, True])
+# False is de-activated due to slowness
+@pytest.mark.parametrize("persistent_workers", [True])
+def test_workers_are_shutdown(tmpdir, should_fail, persistent_workers):
     # `num_workers == 1` uses `_MultiProcessingDataLoaderIter`
     # `persistent_workers` makes sure `self._iterator` gets set on the `DataLoader` instance
 
@@ -936,12 +938,30 @@ def _get_iterator(self):
     train_dataloader = TestDataLoader(RandomDataset(32, 64), num_workers=1, persistent_workers=persistent_workers)
     val_dataloader = TestDataLoader(RandomDataset(32, 64), num_workers=1, persistent_workers=persistent_workers)
 
+    class TestCallback(Callback):
+        def on_train_epoch_end(self, trainer, *_):
+            if trainer.current_epoch == 1:
+                raise CustomException
+
     max_epochs = 3
+
     model = BoringModel()
-    trainer = Trainer(default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=2, max_epochs=max_epochs)
-    trainer.fit(model, train_dataloader, val_dataloader)
-    assert train_dataloader.count_shutdown_workers == (2 if persistent_workers else max_epochs)
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=2,
+        limit_val_batches=2,
+        max_epochs=max_epochs,
+        callbacks=TestCallback() if should_fail else None,
+    )
+
+    if should_fail:
+        with pytest.raises(CustomException):
+            trainer.fit(model, train_dataloader, val_dataloader)
+    else:
+        trainer.fit(model, train_dataloader, val_dataloader)
+
+    assert train_dataloader.count_shutdown_workers == 2 if should_fail else (2 if persistent_workers else max_epochs)
     # on sanity checking end, the workers are being deleted too.
-    assert val_dataloader.count_shutdown_workers == (2 if persistent_workers else max_epochs + 1)
+    assert val_dataloader.count_shutdown_workers == 2 if persistent_workers else (3 if should_fail else max_epochs + 1)
     assert train_dataloader._iterator is None
     assert val_dataloader._iterator is None

From 8b0cb47cc03bc29a69e84a5637f8caf23ef364a7 Mon Sep 17 00:00:00 2001
From: puhuk <wetr235@gmail.com>
Date: Mon, 15 Nov 2021 20:54:47 +0900
Subject: [PATCH 11/18] Remove deprecated `hpc_load` in `CheckpointConnector`
 (#10525)

Co-authored-by: Aki Nitta <nitta@akihironitta.com>
---
 CHANGELOG.md                                          |  3 +++
 .../trainer/connectors/checkpoint_connector.py        | 11 -----------
 tests/deprecated_api/test_remove_1-6.py               | 10 ----------
 3 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ff6ebbbc6512..4aef0b569c7b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -123,6 +123,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
 
+
+- Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525))
+
 ### Fixed
 
 - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702))
diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
index 921c2e0a7e160..ab0d3aa4288fa 100644
--- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py
+++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
@@ -413,17 +413,6 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict:
 
         return checkpoint
 
-    def hpc_load(self, checkpoint_path: _PATH) -> None:
-        """Attempts to restore the full training and model state from a HPC checkpoint file.
-
-        .. deprecated:: v1.4     Will be removed in v1.6. Use :meth:`restore` instead.
-        """
-        rank_zero_deprecation(
-            "`CheckpointConnector.hpc_load()` was deprecated in v1.4 and will be removed in v1.6."
-            " Use `CheckpointConnector.restore()` instead."
-        )
-        self.restore(checkpoint_path)
-
     def max_ckpt_version_in_folder(self, dir_path: _PATH, name_key: str = "ckpt_") -> Optional[int]:
         """List up files in `dir_path` with `name_key`, then yield maximum suffix number.
 
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index d2f3cec5cba4f..4270b5d278a2d 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -62,13 +62,3 @@ def test_v1_6_0_deprecated_disable_validation():
     trainer = Trainer()
     with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"):
         _ = trainer.disable_validation
-
-
-def test_v1_6_0_deprecated_hpc_load(tmpdir):
-    model = BoringModel()
-    trainer = Trainer(default_root_dir=tmpdir, max_steps=1)
-    trainer.fit(model)
-    trainer.checkpoint_connector.hpc_save(tmpdir, trainer.logger)
-    checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(str(tmpdir))
-    with pytest.deprecated_call(match=r"`CheckpointConnector.hpc_load\(\)` was deprecated in v1.4"):
-        trainer.checkpoint_connector.hpc_load(checkpoint_path)

From 794c4b08c0dacd89e38d5cb393db3fbd14c358b1 Mon Sep 17 00:00:00 2001
From: Shivam Mehta <shivam.mehta007@gmail.com>
Date: Mon, 15 Nov 2021 13:56:30 +0100
Subject: [PATCH 12/18] Remove deprecated `is_overridden(model=...)` (#10507)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
---
 CHANGELOG.md                                 |  3 +++
 pytorch_lightning/utilities/model_helpers.py | 17 ++---------------
 tests/deprecated_api/test_remove_1-6.py      |  9 ---------
 3 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4aef0b569c7b6..02283180b19f4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Removed
 
+- Removed deprecated parameter `method` in `pytorch_lightning.utilities.model_helpers.is_overridden` ([#10507](https://github.com/PyTorchLightning/pytorch-lightning/pull/10507))
+
+
 - Remove deprecated method `ClusterEnvironment.creates_children` ([#10339](https://github.com/PyTorchLightning/pytorch-lightning/issues/10339))
 
 
diff --git a/pytorch_lightning/utilities/model_helpers.py b/pytorch_lightning/utilities/model_helpers.py
index 3146b33fe153d..bb48b481e625f 100644
--- a/pytorch_lightning/utilities/model_helpers.py
+++ b/pytorch_lightning/utilities/model_helpers.py
@@ -12,26 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from functools import partial
-from typing import Optional, Type, Union
+from typing import Optional, Type
 from unittest.mock import Mock
 
 import pytorch_lightning as pl
-from pytorch_lightning.utilities import rank_zero_deprecation
 
 
-def is_overridden(
-    method_name: str,
-    instance: Optional[object] = None,
-    parent: Optional[Type[object]] = None,
-    model: Optional[Union["pl.LightningModule", "pl.LightningDataModule"]] = None,
-) -> bool:
-    if model is not None and instance is None:
-        rank_zero_deprecation(
-            "`is_overriden(model=...)` has been deprecated and will be removed in v1.6."
-            "Please use `is_overriden(instance=...)`"
-        )
-        instance = model
-
+def is_overridden(method_name: str, instance: Optional[object] = None, parent: Optional[Type[object]] = None) -> bool:
     if instance is None:
         # if `self.lightning_module` was passed as instance, it can be `None`
         return False
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index 4270b5d278a2d..efb288a623d6a 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -17,7 +17,6 @@
 import pytest
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.utilities.model_helpers import is_overridden
 from tests.helpers import BoringModel
 
 
@@ -50,14 +49,6 @@ def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir):
     assert tracker.mock_calls == expected_sequence
 
 
-def test_v1_6_0_is_overridden_model():
-    model = BoringModel()
-    with pytest.deprecated_call(match="and will be removed in v1.6"):
-        assert is_overridden("validation_step", model=model)
-    with pytest.deprecated_call(match="and will be removed in v1.6"):
-        assert not is_overridden("foo", model=model)
-
-
 def test_v1_6_0_deprecated_disable_validation():
     trainer = Trainer()
     with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"):

From 0c2cdbf88f0f72e1cbad0e73c00f2618d33e978d Mon Sep 17 00:00:00 2001
From: Danielle Pintz <38207072+daniellepintz@users.noreply.github.com>
Date: Mon, 15 Nov 2021 08:44:20 -0500
Subject: [PATCH 13/18] Update issues templates (#10537)

---
 .github/ISSUE_TEMPLATE/bug_report.md                 |  6 +++---
 .github/ISSUE_TEMPLATE/documentation.md              |  4 ++--
 .github/ISSUE_TEMPLATE/feature_request.md            |  6 +++---
 .../{code_improvement.md => refactor.md}             | 12 ++++++------
 4 files changed, 14 insertions(+), 14 deletions(-)
 rename .github/ISSUE_TEMPLATE/{code_improvement.md => refactor.md} (83%)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 3a94ef6758910..729d258cfcd63 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,14 +1,14 @@
 ---
 name: Bug report
-about: Create a report to help us improve
+about: Create a bug report to help us improve
 title: ''
-labels: bug / fix, help wanted
+labels: bug
 assignees: ''
 ---
 
 ## 🐛 Bug
 
-<!-- A clear and concise description of what the bug is. -->
+<!-- A clear and concise description of the bug. -->
 
 ### To Reproduce
 
diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
index 75919587387a9..f5ff43d6f093a 100644
--- a/.github/ISSUE_TEMPLATE/documentation.md
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@@ -1,8 +1,8 @@
 ---
 name: Typos and doc fixes
-about: Typos and doc fixes
+about: Tell us about how we can improve our documentation
 title: ''
-labels: documentation
+labels: docs
 assignees: ''
 ---
 
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index ab95a714e6dd7..11da695decfe0 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,8 +1,8 @@
 ---
 name: Feature request
-about: Suggest an idea for this project
+about: Propose a feature for this project
 title: ''
-labels: enhancement
+labels: feature
 assignees: ''
 ---
 
@@ -12,7 +12,7 @@ assignees: ''
 
 ### Motivation
 
-<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link it here -->
 
 ### Pitch
 
diff --git a/.github/ISSUE_TEMPLATE/code_improvement.md b/.github/ISSUE_TEMPLATE/refactor.md
similarity index 83%
rename from .github/ISSUE_TEMPLATE/code_improvement.md
rename to .github/ISSUE_TEMPLATE/refactor.md
index 7608b604e611b..5e07b0aae2df1 100644
--- a/.github/ISSUE_TEMPLATE/code_improvement.md
+++ b/.github/ISSUE_TEMPLATE/refactor.md
@@ -1,18 +1,18 @@
 ---
-name: Code improvement
-about: Suggest a code improvement, i.e. refactoring, deprecation, etc.
+name: Refactor
+about: Suggest a code refactor or deprecation
 title: ''
-labels: refactors / code health
+labels: refactor
 assignees: ''
 ---
 
-## Proposed refactoring or deprecation
+## Proposed refactor
 
-<!-- A clear and concise description of the code improvement -->
+<!-- A clear and concise description of the refactor -->
 
 ### Motivation
 
-<!-- Please outline the motivation for the proposal. If this is related to another GitHub issue, please link here too -->
+<!-- Please outline the motivation for the proposal. If this is related to another GitHub issue, please link it here -->
 
 ### Pitch
 

From ade44653737f8df203118e3f64759cbb54666e24 Mon Sep 17 00:00:00 2001
From: Aki Nitta <nitta@akihironitta.com>
Date: Mon, 15 Nov 2021 23:15:17 +0900
Subject: [PATCH 14/18] Update configs with new GitHub labels (#10532)

Co-authored-by: thomas chaton <thomas@grid.ai>
---
 .github/mergify.yml | 6 +++---
 .github/stale.yml   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/mergify.yml b/.github/mergify.yml
index a2b1e8aede6de..53ec106873dfe 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -45,7 +45,7 @@ pull_request_rules:
       - "#changes-requested-reviews-by=0" # no requested changes
     actions:
       label:
-        add: [ "0:] Ready-To-Go" ]
+        add: [ "ready" ]
 
   - name: Not ready yet
     conditions:
@@ -54,13 +54,13 @@ pull_request_rules:
         - "#changes-requested-reviews-by>=1" # no requested changes
     actions:
       label:
-        remove: [ "0:] Ready-To-Go" ]
+        remove: [ "ready" ]
 
   - name: add core reviewer
     conditions:
       - -conflict # skip if conflict
       - -draft # filter-out GH draft PRs
-      - label="0:] Ready-To-Go"
+      - label="ready"
       - "#approved-reviews-by<3" # number of review approvals
       - "#review-requested<3" # number of requested reviews
     actions:
diff --git a/.github/stale.yml b/.github/stale.yml
index 84049394d3aab..1ac5e7448c9ff 100644
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -8,8 +8,8 @@ issues:
     daysUntilClose: 7
     # Issues with these labels will never be considered stale
     exemptLabels:
-      - Important
-      - Priority
+      - p0
+      - p1
     # Comment to post when marking an issue as stale. Set to `false` to disable
     markComment: >
       This issue has been automatically marked as stale because it hasn't had any recent activity.

From 3077886a4ff31a023a6f2296dae1021090942a97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Mon, 15 Nov 2021 15:27:42 +0100
Subject: [PATCH 15/18] Enable the auto-cc bot (#10531)

---
 .github/lightning-probot.yml         |  1 +
 .github/workflows/probot-auto-cc.yml | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 .github/lightning-probot.yml
 create mode 100644 .github/workflows/probot-auto-cc.yml

diff --git a/.github/lightning-probot.yml b/.github/lightning-probot.yml
new file mode 100644
index 0000000000000..bd6a330a448a4
--- /dev/null
+++ b/.github/lightning-probot.yml
@@ -0,0 +1 @@
+tracking_issue: 10530
diff --git a/.github/workflows/probot-auto-cc.yml b/.github/workflows/probot-auto-cc.yml
new file mode 100644
index 0000000000000..0595c4eee65f7
--- /dev/null
+++ b/.github/workflows/probot-auto-cc.yml
@@ -0,0 +1,18 @@
+name: Probot
+
+on:
+  issues:
+    types:
+      - labeled
+  pull_request:
+    types:
+      - labeled
+
+jobs:
+  auto-cc:
+    if: ${{ github.repository_owner == 'PyTorchLightning' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: carmocca/probot@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 40945a51765e72400dd3be8f1a84a52a69effa73 Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Mon, 15 Nov 2021 20:36:56 +0530
Subject: [PATCH 16/18] Remove deprecated `stochastic_weight_avg` example from
 the docs (#10502)

---
 docs/source/advanced/training_tricks.rst | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/source/advanced/training_tricks.rst b/docs/source/advanced/training_tricks.rst
index 28f81d98dcbd3..a389b0db69a2e 100644
--- a/docs/source/advanced/training_tricks.rst
+++ b/docs/source/advanced/training_tricks.rst
@@ -64,10 +64,7 @@ read `this post <https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-we
 
 .. testcode::
 
-    # Enable Stochastic Weight Averaging - uses the class defaults
-    trainer = Trainer(stochastic_weight_avg=True)
-
-    # alternatively, if you need to pass custom arguments
+    # Enable Stochastic Weight Averaging using the callback
     trainer = Trainer(callbacks=[StochasticWeightAveraging(...)])
 
 ----------

From 01cf7a2ac5a6db588d53c70da1db8d0adcc7641c Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Mon, 15 Nov 2021 22:40:08 +0530
Subject: [PATCH 17/18] Deprecate `DistributedType` in favor of `StrategyType`
 (#10505)

---
 CHANGELOG.md                                  |   2 +-
 pytorch_lightning/lite/lite.py                |  16 +--
 .../plugins/training_type/ddp.py              |   4 +-
 .../plugins/training_type/ddp2.py             |   4 +-
 .../plugins/training_type/ddp_spawn.py        |   4 +-
 .../plugins/training_type/deepspeed.py        |   4 +-
 pytorch_lightning/plugins/training_type/dp.py |   4 +-
 .../plugins/training_type/fully_sharded.py    |   4 +-
 .../plugins/training_type/horovod.py          |   4 +-
 .../plugins/training_type/sharded.py          |   4 +-
 .../plugins/training_type/sharded_spawn.py    |   4 +-
 .../connectors/accelerator_connector.py       |  80 ++++++-------
 pytorch_lightning/trainer/data_loading.py     |   4 +-
 pytorch_lightning/trainer/trainer.py          |  12 +-
 pytorch_lightning/utilities/__init__.py       |   1 +
 pytorch_lightning/utilities/enums.py          | 105 +++++++++++++++---
 .../test_accelerator_connector.py             |   4 +-
 tests/base/model_test_epoch_ends.py           |  10 +-
 tests/deprecated_api/test_remove_1-8.py       |  23 ++++
 tests/helpers/pipelines.py                    |   4 +-
 tests/lite/test_lite.py                       |  26 ++---
 tests/trainer/test_data_loading.py            |   4 +-
 tests/trainer/test_trainer.py                 |  74 ++++++------
 23 files changed, 250 insertions(+), 151 deletions(-)
 create mode 100644 tests/deprecated_api/test_remove_1-8.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 02283180b19f4..3281a07ff689a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -44,7 +44,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103))
 
 
--
+- Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/PyTorchLightning/pytorch-lightning/pull/10505))
 
 
 -
diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py
index d36e874cbae7b..2a2ed9586b420 100644
--- a/pytorch_lightning/lite/lite.py
+++ b/pytorch_lightning/lite/lite.py
@@ -41,7 +41,7 @@
 )
 from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
 from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
-from pytorch_lightning.utilities import DeviceType, DistributedType, move_data_to_device
+from pytorch_lightning.utilities import _StrategyType, DeviceType, move_data_to_device
 from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors
 from pytorch_lightning.utilities.data import has_iterable_dataset
 from pytorch_lightning.utilities.device_parser import _parse_devices
@@ -477,14 +477,14 @@ def _supported_device_types() -> Sequence[DeviceType]:
         )
 
     @staticmethod
-    def _supported_strategy_types() -> Sequence[DistributedType]:
+    def _supported_strategy_types() -> Sequence[_StrategyType]:
         return (
-            DistributedType.DP,
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
-            DistributedType.DEEPSPEED,
-            DistributedType.DDP_SHARDED,
-            DistributedType.DDP_SHARDED_SPAWN,
+            _StrategyType.DP,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DEEPSPEED,
+            _StrategyType.DDP_SHARDED,
+            _StrategyType.DDP_SHARDED_SPAWN,
         )
 
     @staticmethod
diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
index 84e9b55b9ee08..0285859a6714a 100644
--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@@ -55,7 +55,7 @@
     ReduceOp,
     sync_ddp_if_available,
 )
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -79,7 +79,7 @@ class DDPPlugin(ParallelPlugin):
     devices (e.g. GPU) per node. It is very similar to how :mod:`torch.distributed.launch` launches processes.
     """
 
-    distributed_backend = DistributedType.DDP
+    distributed_backend = _StrategyType.DDP
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/ddp2.py b/pytorch_lightning/plugins/training_type/ddp2.py
index ef623a794da42..a142d518a0f2f 100644
--- a/pytorch_lightning/plugins/training_type/ddp2.py
+++ b/pytorch_lightning/plugins/training_type/ddp2.py
@@ -15,14 +15,14 @@
 
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
 
 class DDP2Plugin(DDPPlugin):
     """DDP2 behaves like DP in one node, but synchronization across nodes behaves like in DDP."""
 
-    distributed_backend = DistributedType.DDP2
+    distributed_backend = _StrategyType.DDP2
 
     @property
     def global_rank(self) -> int:
diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
index 677e031cd04af..a77027adb6dcf 100644
--- a/pytorch_lightning/plugins/training_type/ddp_spawn.py
+++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -43,7 +43,7 @@
     ReduceOp,
     sync_ddp_if_available,
 )
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.seed import reset_seed
 from pytorch_lightning.utilities.types import STEP_OUTPUT
@@ -58,7 +58,7 @@ class DDPSpawnPlugin(ParallelPlugin):
     """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training
     finishes."""
 
-    distributed_backend = DistributedType.DDP_SPAWN
+    distributed_backend = _StrategyType.DDP_SPAWN
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 2464a8ba4eeca..94235f361d945 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -36,7 +36,7 @@
 from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType
 from pytorch_lightning.utilities.apply_func import apply_to_collection
 from pytorch_lightning.utilities.distributed import log, rank_zero_info, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -82,7 +82,7 @@ def _move_float_tensors_to_half(self, batch: Any):
 
 
 class DeepSpeedPlugin(DDPPlugin):
-    distributed_backend = DistributedType.DEEPSPEED
+    distributed_backend = _StrategyType.DEEPSPEED
     DEEPSPEED_ENV_VAR = "PL_DEEPSPEED_CONFIG_PATH"
 
     def __init__(
diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py
index a0f53791bc373..83328e8c47271 100644
--- a/pytorch_lightning/plugins/training_type/dp.py
+++ b/pytorch_lightning/plugins/training_type/dp.py
@@ -20,7 +20,7 @@
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.types import _METRIC_COLLECTION
 
@@ -29,7 +29,7 @@ class DataParallelPlugin(ParallelPlugin):
     """Implements data-parallel training in a single process, i.e., the model gets replicated to each device and
     each gets a split of the data."""
 
-    distributed_backend = DistributedType.DP
+    distributed_backend = _StrategyType.DP
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py
index 704afa1a91aaa..c9601a905df1c 100644
--- a/pytorch_lightning/plugins/training_type/fully_sharded.py
+++ b/pytorch_lightning/plugins/training_type/fully_sharded.py
@@ -20,7 +20,7 @@
 from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_FULLY_SHARDED_AVAILABLE:
@@ -30,7 +30,7 @@
 
 class DDPFullyShardedPlugin(DDPPlugin):
 
-    distributed_backend = DistributedType.DDP_FULLY_SHARDED
+    distributed_backend = _StrategyType.DDP_FULLY_SHARDED
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/horovod.py b/pytorch_lightning/plugins/training_type/horovod.py
index 30360e1ab458f..51558189a3d35 100644
--- a/pytorch_lightning/plugins/training_type/horovod.py
+++ b/pytorch_lightning/plugins/training_type/horovod.py
@@ -26,7 +26,7 @@
 from pytorch_lightning.utilities.distributed import distributed_available
 from pytorch_lightning.utilities.distributed import group as dist_group
 from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 
 if _HOROVOD_AVAILABLE:
     import horovod.torch as hvd
@@ -35,7 +35,7 @@
 class HorovodPlugin(ParallelPlugin):
     """Plugin for Horovod distributed training integration."""
 
-    distributed_backend = DistributedType.HOROVOD
+    distributed_backend = _StrategyType.HOROVOD
 
     def __init__(
         self,
diff --git a/pytorch_lightning/plugins/training_type/sharded.py b/pytorch_lightning/plugins/training_type/sharded.py
index 5955f3a46f38e..d7563437bd16b 100644
--- a/pytorch_lightning/plugins/training_type/sharded.py
+++ b/pytorch_lightning/plugins/training_type/sharded.py
@@ -23,7 +23,7 @@
 from pytorch_lightning.plugins.training_type.ddp import DDPPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_AVAILABLE:
@@ -36,7 +36,7 @@
 class DDPShardedPlugin(DDPPlugin):
     """Optimizer and gradient sharded training provided by FairScale."""
 
-    distributed_backend = DistributedType.DDP_SHARDED
+    distributed_backend = _StrategyType.DDP_SHARDED
     _REDUCE_BUFFER_SIZE_DEFAULT: int = 2 ** 23  # 8M
 
     def __init__(self, *args, **kwargs):
diff --git a/pytorch_lightning/plugins/training_type/sharded_spawn.py b/pytorch_lightning/plugins/training_type/sharded_spawn.py
index e0ae5c7bba187..12e627edbe5cb 100644
--- a/pytorch_lightning/plugins/training_type/sharded_spawn.py
+++ b/pytorch_lightning/plugins/training_type/sharded_spawn.py
@@ -24,7 +24,7 @@
 from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin
 from pytorch_lightning.trainer.states import TrainerFn
 from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, rank_zero_only
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 if _FAIRSCALE_AVAILABLE:
@@ -38,7 +38,7 @@
 class DDPSpawnShardedPlugin(DDPSpawnPlugin):
     """Optimizer sharded training provided by FairScale."""
 
-    distributed_backend = DistributedType.DDP_SHARDED_SPAWN
+    distributed_backend = _StrategyType.DDP_SHARDED_SPAWN
 
     def configure_ddp(self) -> None:
         trainer = self.lightning_module.trainer
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 43eb65ce21a22..47deeed2dca1d 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -61,10 +61,10 @@
     TorchElasticEnvironment,
 )
 from pytorch_lightning.utilities import (
+    _StrategyType,
     AMPType,
     device_parser,
     DeviceType,
-    DistributedType,
     rank_zero_deprecation,
     rank_zero_info,
     rank_zero_warn,
@@ -278,7 +278,7 @@ def _set_devices_if_none(self) -> None:
             self.devices = self.num_processes
 
     def _handle_accelerator_and_strategy(self) -> None:
-        deprecated_types = [t for t in DistributedType if t not in (DistributedType.TPU_SPAWN, DistributedType.DDP_CPU)]
+        deprecated_types = [t for t in _StrategyType if t not in (_StrategyType.TPU_SPAWN, _StrategyType.DDP_CPU)]
         if self.distributed_backend is not None and self.distributed_backend in deprecated_types:
             rank_zero_deprecation(
                 f"Passing `Trainer(accelerator={self.distributed_backend!r})` has been deprecated"
@@ -290,12 +290,12 @@ def _handle_accelerator_and_strategy(self) -> None:
                     f" also passed `Trainer(accelerator={self.distributed_backend!r})`."
                     f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
                 )
-        if self.strategy == DistributedType.TPU_SPAWN:
+        if self.strategy == _StrategyType.TPU_SPAWN:
             raise MisconfigurationException(
                 "`Trainer(strategy='tpu_spawn')` is not a valid strategy,"
                 " you can use `Trainer(strategy='ddp_spawn', accelerator='tpu')` instead."
             )
-        if self.strategy == DistributedType.DDP_CPU:
+        if self.strategy == _StrategyType.DDP_CPU:
             raise MisconfigurationException(
                 "`Trainer(strategy='ddp_cpu')` is not a valid strategy,"
                 " you can use `Trainer(strategy='ddp'|'ddp_spawn', accelerator='cpu')` instead."
@@ -505,31 +505,31 @@ def _map_devices_to_accelerator(self, accelerator: str) -> bool:
 
     @property
     def use_dp(self) -> bool:
-        return self._distrib_type == DistributedType.DP
+        return self._distrib_type == _StrategyType.DP
 
     @property
     def use_ddp(self) -> bool:
         return self._distrib_type in (
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
-            DistributedType.DDP_SHARDED,
-            DistributedType.DDP_SHARDED_SPAWN,
-            DistributedType.DDP_FULLY_SHARDED,
-            DistributedType.DEEPSPEED,
-            DistributedType.TPU_SPAWN,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DDP_SHARDED,
+            _StrategyType.DDP_SHARDED_SPAWN,
+            _StrategyType.DDP_FULLY_SHARDED,
+            _StrategyType.DEEPSPEED,
+            _StrategyType.TPU_SPAWN,
         )
 
     @property
     def use_ddp2(self) -> bool:
-        return self._distrib_type == DistributedType.DDP2
+        return self._distrib_type == _StrategyType.DDP2
 
     @property
     def use_horovod(self) -> bool:
-        return self._distrib_type == DistributedType.HOROVOD
+        return self._distrib_type == _StrategyType.HOROVOD
 
     @property
     def use_deepspeed(self) -> bool:
-        return self._distrib_type == DistributedType.DEEPSPEED
+        return self._distrib_type == _StrategyType.DEEPSPEED
 
     @property
     def _is_sharded_training_type(self) -> bool:
@@ -590,7 +590,7 @@ def root_gpu(self) -> Optional[int]:
 
     @staticmethod
     def _is_plugin_training_type(plugin: Union[str, TrainingTypePlugin]) -> bool:
-        if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(DistributedType)):
+        if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(_StrategyType)):
             return True
         return isinstance(plugin, TrainingTypePlugin)
 
@@ -635,7 +635,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
                     )
                 return TPUBf16PrecisionPlugin()
 
-        if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
+        if self._distrib_type == _StrategyType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin):
             return DeepSpeedPrecisionPlugin(self.precision)
 
         if self.precision == 32:
@@ -706,15 +706,15 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
             use_slurm_ddp = self.use_ddp and self._is_slurm_managing_tasks
             use_torchelastic_ddp = self.use_ddp and TorchElasticEnvironment.is_using_torchelastic()
             use_kubeflow_ddp = self.use_ddp and KubeflowEnvironment.is_using_kubeflow()
-            use_ddp_spawn = self._distrib_type == DistributedType.DDP_SPAWN
+            use_ddp_spawn = self._distrib_type == _StrategyType.DDP_SPAWN
             use_ddp_cpu_spawn = use_ddp_spawn and self.use_cpu
-            use_tpu_spawn = self.use_tpu and self._distrib_type == DistributedType.TPU_SPAWN
+            use_tpu_spawn = self.use_tpu and self._distrib_type == _StrategyType.TPU_SPAWN
             use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment.is_using_torchelastic()
             use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment.is_using_kubeflow()
             use_ddp_cpu_slurm = use_ddp_cpu_spawn and self._is_slurm_managing_tasks
-            use_ddp_sharded = self._distrib_type == DistributedType.DDP_SHARDED
-            use_ddp_sharded_spawn = self._distrib_type == DistributedType.DDP_SHARDED_SPAWN
-            use_ddp_fully_sharded = self._distrib_type == DistributedType.DDP_FULLY_SHARDED
+            use_ddp_sharded = self._distrib_type == _StrategyType.DDP_SHARDED
+            use_ddp_sharded_spawn = self._distrib_type == _StrategyType.DDP_SHARDED_SPAWN
+            use_ddp_fully_sharded = self._distrib_type == _StrategyType.DDP_FULLY_SHARDED
 
             if use_tpu_spawn:
                 ddp_plugin_cls = TPUSpawnPlugin
@@ -839,27 +839,27 @@ def set_distributed_mode(self, strategy: Optional[str] = None):
             if self.has_horovodrun():
                 self._set_horovod_backend()
             elif self.num_gpus == 0 and self.num_nodes > 1:
-                self._distrib_type = DistributedType.DDP
+                self._distrib_type = _StrategyType.DDP
             elif self.num_gpus == 0 and self.num_processes > 1:
-                self.distributed_backend = DistributedType.DDP_SPAWN
+                self.distributed_backend = _StrategyType.DDP_SPAWN
             elif self.num_gpus > 1 and not _use_cpu:
                 rank_zero_warn(
                     "You requested multiple GPUs but did not specify a backend, e.g."
                     ' `Trainer(strategy="dp"|"ddp"|"ddp2")`. Setting `strategy="ddp_spawn"` for you.'
                 )
-                self.distributed_backend = DistributedType.DDP_SPAWN
+                self.distributed_backend = _StrategyType.DDP_SPAWN
 
         # special case with DDP on CPUs
-        if self.distributed_backend == DistributedType.DDP_CPU:
+        if self.distributed_backend == _StrategyType.DDP_CPU:
             if _TPU_AVAILABLE:
                 raise MisconfigurationException(
                     "`accelerator='ddp_cpu'` is not supported on TPU machines. "
                     "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810"
                 )
             if self.num_processes == 1 and self.num_nodes > 1:
-                self._distrib_type = DistributedType.DDP
+                self._distrib_type = _StrategyType.DDP
             else:
-                self._distrib_type = DistributedType.DDP_SPAWN
+                self._distrib_type = _StrategyType.DDP_SPAWN
             if self.num_gpus > 0:
                 rank_zero_warn(
                     "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
@@ -872,25 +872,25 @@ def set_distributed_mode(self, strategy: Optional[str] = None):
         elif self.has_tpu and not _use_cpu:
             self._device_type = DeviceType.TPU
             if isinstance(self.tpu_cores, int):
-                self._distrib_type = DistributedType.TPU_SPAWN
+                self._distrib_type = _StrategyType.TPU_SPAWN
         elif self.has_ipu and not _use_cpu:
             self._device_type = DeviceType.IPU
         elif self.distributed_backend and self._distrib_type is None:
-            self._distrib_type = DistributedType(self.distributed_backend)
+            self._distrib_type = _StrategyType(self.distributed_backend)
 
         if self.num_gpus > 0 and not _use_cpu:
             self._device_type = DeviceType.GPU
 
-        _gpu_distrib_types = (DistributedType.DP, DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2)
+        _gpu_distrib_types = (_StrategyType.DP, _StrategyType.DDP, _StrategyType.DDP_SPAWN, _StrategyType.DDP2)
         # DP and DDP2 cannot run without GPU
         if self.num_gpus == 0 and self._distrib_type in _gpu_distrib_types and not _use_cpu:
 
             if (self.num_nodes and self.num_nodes > 1) or (self.num_processes and self.num_processes > 1):
-                if self._distrib_type in (DistributedType.DP, DistributedType.DDP2):
+                if self._distrib_type in (_StrategyType.DP, _StrategyType.DDP2):
                     rank_zero_warn(
                         f"{self._distrib_type.value!r} is not supported on CPUs, hence setting `strategy='ddp'`."
                     )
-                    self._distrib_type = DistributedType.DDP
+                    self._distrib_type = _StrategyType.DDP
             else:
                 rank_zero_warn("You are running on single node with no parallelization, so distributed has no effect.")
                 self._distrib_type = None
@@ -900,28 +900,28 @@ def set_distributed_mode(self, strategy: Optional[str] = None):
 
         # for DDP overwrite nb processes by requested GPUs
         if self._device_type == DeviceType.GPU and self._distrib_type in (
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
         ):
             self.num_processes = self.num_gpus
 
-        if self._device_type == DeviceType.GPU and self._distrib_type == DistributedType.DDP2:
+        if self._device_type == DeviceType.GPU and self._distrib_type == _StrategyType.DDP2:
             self.num_processes = self.num_nodes
 
         # Horovod is an extra case...
-        if self.distributed_backend == DistributedType.HOROVOD:
+        if self.distributed_backend == _StrategyType.HOROVOD:
             self._set_horovod_backend()
 
         using_valid_distributed = self.use_ddp or self.use_ddp2
         if self.num_nodes > 1 and not using_valid_distributed:
-            # throw error to force user to choose a supported distributed type such as ddp or ddp2
+            # throw error to force user to choose a supported strategy type such as ddp or ddp2
             raise MisconfigurationException(
                 "Your chosen strategy does not support `num_nodes > 1`. Please set `strategy=('ddp'|'ddp2')`."
             )
 
     def _set_horovod_backend(self):
         self.check_horovod()
-        self._distrib_type = DistributedType.HOROVOD
+        self._distrib_type = _StrategyType.HOROVOD
 
         # Initialize Horovod to get rank / size info
         hvd.init()
@@ -941,7 +941,7 @@ def check_interactive_compatibility(self):
                 f"`Trainer(strategy={self._distrib_type.value!r})` or"
                 f" `Trainer(accelerator={self._distrib_type.value!r})` is not compatible with an interactive"
                 " environment. Run your code as a script, or choose one of the compatible backends:"
-                f" {', '.join(DistributedType.interactive_compatible_types())}."
+                f" {', '.join(_StrategyType.interactive_compatible_types())}."
                 " In case you are spawning processes yourself, make sure to include the Trainer"
                 " creation inside the worker function."
             )
diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 37a234f32f711..931f6a92958ee 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -38,7 +38,7 @@
     FastForwardSampler,
 )
 from pytorch_lightning.utilities.data import get_len, has_iterable_dataset, has_len_all_ranks
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _fault_tolerant_training
 from pytorch_lightning.utilities.model_helpers import is_overridden
@@ -70,7 +70,7 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
         if not isinstance(dataloader, DataLoader):
             return
 
-        using_spawn = self._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
+        using_spawn = self._accelerator_connector._distrib_type == _StrategyType.DDP_SPAWN
         num_cpus = multiprocessing.cpu_count()
 
         # ddp_spawn + num_workers > 0 don't mix! tell the user
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b84f03393309b..5007927aa93e2 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -64,10 +64,10 @@
 from pytorch_lightning.tuner.tuning import Tuner
 from pytorch_lightning.utilities import (
     _IPU_AVAILABLE,
+    _StrategyType,
     _TPU_AVAILABLE,
     device_parser,
     DeviceType,
-    DistributedType,
     GradClipAlgorithmType,
     parsing,
     rank_zero_deprecation,
@@ -1591,7 +1591,7 @@ def should_rank_save_checkpoint(self) -> bool:
         return self.training_type_plugin.should_rank_save_checkpoint
 
     @property
-    def _distrib_type(self) -> DistributedType:
+    def _distrib_type(self) -> _StrategyType:
         return self._accelerator_connector._distrib_type
 
     @property
@@ -1754,10 +1754,10 @@ def distributed_sampler_kwargs(self) -> Optional[dict]:
     @property
     def data_parallel(self) -> bool:
         return self._distrib_type in (
-            DistributedType.DP,
-            DistributedType.DDP,
-            DistributedType.DDP_SPAWN,
-            DistributedType.DDP2,
+            _StrategyType.DP,
+            _StrategyType.DDP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DDP2,
         )
 
     @property
diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py
index 7343e28d6d811..22164908a3e3f 100644
--- a/pytorch_lightning/utilities/__init__.py
+++ b/pytorch_lightning/utilities/__init__.py
@@ -18,6 +18,7 @@
 from pytorch_lightning.utilities.apply_func import move_data_to_device  # noqa: F401
 from pytorch_lightning.utilities.distributed import AllGatherGrad, rank_zero_info, rank_zero_only  # noqa: F401
 from pytorch_lightning.utilities.enums import (  # noqa: F401
+    _StrategyType,
     AMPType,
     DeviceType,
     DistributedType,
diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py
index 436c675c382c2..18b0336b82d5f 100644
--- a/pytorch_lightning/utilities/enums.py
+++ b/pytorch_lightning/utilities/enums.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Enumerated utilities."""
-from enum import Enum
-from typing import List, Optional, Union
+from enum import Enum, EnumMeta
+from typing import Any, List, Optional, Union
+
+from pytorch_lightning.utilities.warnings import rank_zero_deprecation
 
 
 class LightningEnum(str, Enum):
@@ -37,6 +39,31 @@ def __hash__(self) -> int:
         return hash(self.value.lower())
 
 
+class _OnAccessEnumMeta(EnumMeta):
+    """Enum with a hook to run a function whenever a member is accessed.
+
+    Adapted from:
+    https://www.buzzphp.com/posts/how-do-i-detect-and-invoke-a-function-when-a-python-enum-member-is-accessed
+    """
+
+    def __getattribute__(cls, name: str) -> Any:
+        obj = super().__getattribute__(name)
+        if isinstance(obj, Enum):
+            obj.deprecate()
+        return obj
+
+    def __getitem__(cls, name: str) -> Any:
+        member = super().__getitem__(name)
+        member.deprecate()
+        return member
+
+    def __call__(cls, value: str, *args: Any, **kwargs: Any) -> Any:
+        obj = super().__call__(value, *args, **kwargs)
+        if isinstance(obj, Enum):
+            obj.deprecate()
+        return obj
+
+
 class AMPType(LightningEnum):
     """Type of Automatic Mixed Precission used for training.
 
@@ -73,8 +100,8 @@ def supported_types() -> List[str]:
         return [x.value for x in PrecisionType]
 
 
-class DistributedType(LightningEnum):
-    """Define type of distributed computing.
+class DistributedType(LightningEnum, metaclass=_OnAccessEnumMeta):
+    """Define type of training strategy.
 
     >>> # you can match the type with string
     >>> DistributedType.DDP == 'ddp'
@@ -82,8 +109,24 @@ class DistributedType(LightningEnum):
     >>> # which is case invariant
     >>> DistributedType.DDP2 in ('ddp2', )
     True
+
+    Deprecated since v1.6.0 and will be removed in v1.8.0.
+
+    Use `_StrategyType` instead.
     """
 
+    DP = "dp"
+    DDP = "ddp"
+    DDP2 = "ddp2"
+    DDP_CPU = "ddp_cpu"
+    DDP_SPAWN = "ddp_spawn"
+    TPU_SPAWN = "tpu_spawn"
+    DEEPSPEED = "deepspeed"
+    HOROVOD = "horovod"
+    DDP_SHARDED = "ddp_sharded"
+    DDP_SHARDED_SPAWN = "ddp_sharded_spawn"
+    DDP_FULLY_SHARDED = "ddp_fully_sharded"
+
     @staticmethod
     def interactive_compatible_types() -> List["DistributedType"]:
         """Returns a list containing interactive compatible DistributeTypes."""
@@ -98,17 +141,11 @@ def is_interactive_compatible(self) -> bool:
         """Returns whether self is interactive compatible."""
         return self in DistributedType.interactive_compatible_types()
 
-    DP = "dp"
-    DDP = "ddp"
-    DDP2 = "ddp2"
-    DDP_CPU = "ddp_cpu"
-    DDP_SPAWN = "ddp_spawn"
-    TPU_SPAWN = "tpu_spawn"
-    DEEPSPEED = "deepspeed"
-    HOROVOD = "horovod"
-    DDP_SHARDED = "ddp_sharded"
-    DDP_SHARDED_SPAWN = "ddp_sharded_spawn"
-    DDP_FULLY_SHARDED = "ddp_fully_sharded"
+    def deprecate(self) -> None:
+        rank_zero_deprecation(
+            "`DistributedType` Enum has been deprecated in v1.6 and will be removed in v1.8."
+            " Use the string value `{self.value!r}` instead."
+        )
 
 
 class DeviceType(LightningEnum):
@@ -188,3 +225,41 @@ def get_max_depth(mode: str) -> int:
     @staticmethod
     def supported_types() -> List[str]:
         return [x.value for x in ModelSummaryMode]
+
+
+class _StrategyType(LightningEnum):
+    """Define type of training strategy.
+
+    >>> # you can match the type with string
+    >>> _StrategyType.DDP == 'ddp'
+    True
+    >>> # which is case invariant
+    >>> _StrategyType.DDP2 in ('ddp2', )
+    True
+    """
+
+    DP = "dp"
+    DDP = "ddp"
+    DDP2 = "ddp2"
+    DDP_CPU = "ddp_cpu"
+    DDP_SPAWN = "ddp_spawn"
+    TPU_SPAWN = "tpu_spawn"
+    DEEPSPEED = "deepspeed"
+    HOROVOD = "horovod"
+    DDP_SHARDED = "ddp_sharded"
+    DDP_SHARDED_SPAWN = "ddp_sharded_spawn"
+    DDP_FULLY_SHARDED = "ddp_fully_sharded"
+
+    @staticmethod
+    def interactive_compatible_types() -> List["_StrategyType"]:
+        """Returns a list containing interactive compatible _StrategyTypes."""
+        return [
+            _StrategyType.DP,
+            _StrategyType.DDP_SPAWN,
+            _StrategyType.DDP_SHARDED_SPAWN,
+            _StrategyType.TPU_SPAWN,
+        ]
+
+    def is_interactive_compatible(self) -> bool:
+        """Returns whether self is interactive compatible."""
+        return self in _StrategyType.interactive_compatible_types()
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index d95f5c8e6f9ea..e70d862b048e0 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -43,7 +43,7 @@
     SLURMEnvironment,
     TorchElasticEnvironment,
 )
-from pytorch_lightning.utilities import DeviceType, DistributedType
+from pytorch_lightning.utilities import _StrategyType, DeviceType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.runif import RunIf
@@ -636,7 +636,7 @@ def test_unsupported_distrib_types_on_cpu(training_type):
     with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting `strategy='ddp"):
         trainer = Trainer(accelerator=training_type, num_processes=2)
 
-    assert trainer._distrib_type == DistributedType.DDP
+    assert trainer._distrib_type == _StrategyType.DDP
 
 
 def test_accelerator_ddp_for_cpu(tmpdir):
diff --git a/tests/base/model_test_epoch_ends.py b/tests/base/model_test_epoch_ends.py
index 746ceb94a5de0..b001298e93dd0 100644
--- a/tests/base/model_test_epoch_ends.py
+++ b/tests/base/model_test_epoch_ends.py
@@ -15,7 +15,7 @@
 
 import torch
 
-from pytorch_lightning.utilities import DistributedType
+from pytorch_lightning.utilities import _StrategyType
 
 
 class TestEpochEndVariations(ABC):
@@ -34,13 +34,13 @@ def test_epoch_end(self, outputs):
             test_loss = self.get_output_metric(output, "test_loss")
 
             # reduce manually when using dp
-            if self.trainer._distrib_type == DistributedType.DP:
+            if self.trainer._distrib_type == _StrategyType.DP:
                 test_loss = torch.mean(test_loss)
             test_loss_mean += test_loss
 
             # reduce manually when using dp
             test_acc = self.get_output_metric(output, "test_acc")
-            if self.trainer._distrib_type == DistributedType.DP:
+            if self.trainer._distrib_type == _StrategyType.DP:
                 test_acc = torch.mean(test_acc)
 
             test_acc_mean += test_acc
@@ -69,13 +69,13 @@ def test_epoch_end__multiple_dataloaders(self, outputs):
                 test_loss = output["test_loss"]
 
                 # reduce manually when using dp
-                if self.trainer._distrib_type == DistributedType.DP:
+                if self.trainer._distrib_type == _StrategyType.DP:
                     test_loss = torch.mean(test_loss)
                 test_loss_mean += test_loss
 
                 # reduce manually when using dp
                 test_acc = output["test_acc"]
-                if self.trainer._distrib_type == DistributedType.DP:
+                if self.trainer._distrib_type == _StrategyType.DP:
                     test_acc = torch.mean(test_acc)
 
                 test_acc_mean += test_acc
diff --git a/tests/deprecated_api/test_remove_1-8.py b/tests/deprecated_api/test_remove_1-8.py
new file mode 100644
index 0000000000000..f668f63b9f450
--- /dev/null
+++ b/tests/deprecated_api/test_remove_1-8.py
@@ -0,0 +1,23 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test deprecated functionality which will be removed in v1.8.0."""
+import pytest
+
+from pytorch_lightning.utilities.enums import DistributedType
+
+
+def test_v1_8_0_deprecated_distributed_type_enum():
+
+    with pytest.deprecated_call(match="has been deprecated in v1.6 and will be removed in v1.8."):
+        _ = DistributedType.DDP
diff --git a/tests/helpers/pipelines.py b/tests/helpers/pipelines.py
index 643d3e50cb894..6fa3bbb5dc943 100644
--- a/tests/helpers/pipelines.py
+++ b/tests/helpers/pipelines.py
@@ -15,7 +15,7 @@
 from torchmetrics.functional import accuracy
 
 from pytorch_lightning import LightningDataModule, LightningModule, Trainer
-from pytorch_lightning.utilities import DistributedType
+from pytorch_lightning.utilities import _StrategyType
 from tests.helpers import BoringModel
 from tests.helpers.utils import get_default_logger, load_model_from_checkpoint, reset_seed
 
@@ -82,7 +82,7 @@ def run_model_test(
             run_prediction_eval_model_template(model, dataloader, min_acc=min_acc)
 
     if with_hpc:
-        if trainer._distrib_type in (DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2):
+        if trainer._distrib_type in (_StrategyType.DDP, _StrategyType.DDP_SPAWN, _StrategyType.DDP2):
             # on hpc this would work fine... but need to hack it for the purpose of the test
             trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = trainer.init_optimizers(
                 pretrained_model
diff --git a/tests/lite/test_lite.py b/tests/lite/test_lite.py
index bd69cf359473e..7c79cb7f2e709 100644
--- a/tests/lite/test_lite.py
+++ b/tests/lite/test_lite.py
@@ -31,7 +31,7 @@
     _replace_dataloader_init_method,
 )
 from pytorch_lightning.plugins import DeepSpeedPlugin, PrecisionPlugin, TrainingTypePlugin
-from pytorch_lightning.utilities import DistributedType
+from pytorch_lightning.utilities import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.seed import pl_worker_init_function
 from tests.helpers.runif import RunIf
@@ -251,12 +251,12 @@ def test_seed_everything():
 @pytest.mark.parametrize(
     "strategy",
     [
-        DistributedType.DP,
-        DistributedType.DDP,
-        DistributedType.DDP_SPAWN,
-        pytest.param(DistributedType.DEEPSPEED, marks=RunIf(deepspeed=True)),
-        pytest.param(DistributedType.DDP_SHARDED, marks=RunIf(fairscale=True)),
-        pytest.param(DistributedType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
+        _StrategyType.DP,
+        _StrategyType.DDP,
+        _StrategyType.DDP_SPAWN,
+        pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)),
+        pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)),
+        pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
     ],
 )
 def test_setup_dataloaders_replace_custom_sampler(strategy):
@@ -279,12 +279,12 @@ def test_setup_dataloaders_replace_custom_sampler(strategy):
 @pytest.mark.parametrize(
     "strategy",
     [
-        DistributedType.DP,
-        DistributedType.DDP,
-        DistributedType.DDP_SPAWN,
-        pytest.param(DistributedType.DEEPSPEED, marks=RunIf(deepspeed=True)),
-        pytest.param(DistributedType.DDP_SHARDED, marks=RunIf(fairscale=True)),
-        pytest.param(DistributedType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
+        _StrategyType.DP,
+        _StrategyType.DDP,
+        _StrategyType.DDP_SPAWN,
+        pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)),
+        pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)),
+        pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)),
     ],
 )
 @pytest.mark.parametrize("shuffle", [True, False])
diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
index 97097b2074ca1..4f3a482e37ac4 100644
--- a/tests/trainer/test_data_loading.py
+++ b/tests/trainer/test_data_loading.py
@@ -20,7 +20,7 @@
 from torch.utils.data.sampler import BatchSampler, Sampler, SequentialSampler
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.utilities.enums import DistributedType
+from pytorch_lightning.utilities.enums import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel, RandomDataset
 from tests.helpers.runif import RunIf
@@ -137,7 +137,7 @@ def _get_warning_msg():
 @pytest.mark.parametrize("num_workers", [0, 1])
 def test_dataloader_warnings(tmpdir, num_workers):
     trainer = Trainer(default_root_dir=tmpdir, strategy="ddp_spawn", num_processes=2, fast_dev_run=4)
-    assert trainer._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
+    assert trainer._accelerator_connector._distrib_type == _StrategyType.DDP_SPAWN
     trainer.fit(TestSpawnBoringModel(num_workers))
 
 
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index d2e5f771a9c40..dc0ce2b68452c 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -48,7 +48,7 @@
     DDPSpawnShardedPlugin,
 )
 from pytorch_lightning.trainer.states import TrainerFn
-from pytorch_lightning.utilities import DeviceType, DistributedType
+from pytorch_lightning.utilities import _StrategyType, DeviceType
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException
 from pytorch_lightning.utilities.seed import seed_everything
@@ -1154,15 +1154,15 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
         ),
         (
             dict(accelerator="ddp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="ddp", num_nodes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="ddp2", gpus=None),
@@ -1174,43 +1174,43 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
         ),
         (
             dict(accelerator="dp", gpus=1),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(accelerator="ddp", gpus=1),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(accelerator="ddp_cpu", num_processes=2, gpus=1),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="ddp2", gpus=1),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(accelerator=None, gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(accelerator="dp", gpus=2),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(accelerator="ddp", gpus=2),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(accelerator="ddp2", gpus=2),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(accelerator="ddp2", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(accelerator="dp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
     ],
 )
@@ -2096,11 +2096,11 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy="ddp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="ddp", num_nodes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1),
         ),
         (
             dict(strategy="ddp2", gpus=None),
@@ -2112,47 +2112,47 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy="dp", gpus=1),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy="ddp", gpus=1),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy="ddp_spawn", gpus=1),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy="ddp2", gpus=1),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1),
         ),
         (
             dict(strategy=None, gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(strategy="dp", gpus=2),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy="ddp", gpus=2),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2),
         ),
         (
             dict(strategy="ddp2", gpus=2),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy="ddp2", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="dp", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy="ddp_spawn", num_processes=1, gpus=None),
@@ -2161,7 +2161,7 @@ def training_step(self, batch, batch_idx):
         (
             dict(strategy="ddp_fully_sharded", gpus=1),
             dict(
-                _distrib_type=DistributedType.DDP_FULLY_SHARDED,
+                _distrib_type=_StrategyType.DDP_FULLY_SHARDED,
                 _device_type=DeviceType.GPU,
                 num_gpus=1,
                 num_processes=1,
@@ -2169,32 +2169,32 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy=DDPSpawnPlugin(), num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy=DDPSpawnPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DDPPlugin(), num_processes=2, gpus=None),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2),
         ),
         (
             dict(strategy=DDPPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DDP2Plugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DataParallelPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
         (
             dict(strategy=DDPFullyShardedPlugin(), gpus=2),
             dict(
-                _distrib_type=DistributedType.DDP_FULLY_SHARDED,
+                _distrib_type=_StrategyType.DDP_FULLY_SHARDED,
                 _device_type=DeviceType.GPU,
                 num_gpus=2,
                 num_processes=1,
@@ -2203,7 +2203,7 @@ def training_step(self, batch, batch_idx):
         (
             dict(strategy=DDPSpawnShardedPlugin(), gpus=2),
             dict(
-                _distrib_type=DistributedType.DDP_SHARDED_SPAWN,
+                _distrib_type=_StrategyType.DDP_SHARDED_SPAWN,
                 _device_type=DeviceType.GPU,
                 num_gpus=2,
                 num_processes=1,
@@ -2211,7 +2211,7 @@ def training_step(self, batch, batch_idx):
         ),
         (
             dict(strategy=DDPShardedPlugin(), gpus=2),
-            dict(_distrib_type=DistributedType.DDP_SHARDED, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
+            dict(_distrib_type=_StrategyType.DDP_SHARDED, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1),
         ),
     ],
 )

From ae71284627793f92027a6515bf816cb855282593 Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Tue, 16 Nov 2021 00:12:00 +0530
Subject: [PATCH 18/18] Remove deprecated `disable_validation` property from
 Trainer (#10450)

---
 CHANGELOG.md                            | 5 +++++
 pytorch_lightning/trainer/trainer.py    | 9 ---------
 tests/deprecated_api/test_remove_1-6.py | 6 ------
 3 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3281a07ff689a..7002d1680856c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -127,8 +127,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482))
 
 
+- Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/PyTorchLightning/pytorch-lightning/pull/10450))
+
+
 - Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525))
 
+
+
 ### Fixed
 
 - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702))
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 5007927aa93e2..4cbb33c9b4766 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1783,15 +1783,6 @@ def _should_reload_dl_epoch(self) -> bool:
         n_epochs = self.reload_dataloaders_every_n_epochs
         return n_epochs and (not self.current_epoch % n_epochs)
 
-    @property
-    def disable_validation(self) -> bool:
-        """Check if validation is disabled during training."""
-        rank_zero_deprecation(
-            "`trainer.disable_validation` is deprecated in v1.4 and will be removed in v1.6."
-            " Use `not trainer.enable_validation` instead."
-        )
-        return not self.enable_validation
-
     @property
     def enable_validation(self) -> bool:
         """Check if we should run validation during training."""
diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py
index efb288a623d6a..1ded07734a7de 100644
--- a/tests/deprecated_api/test_remove_1-6.py
+++ b/tests/deprecated_api/test_remove_1-6.py
@@ -47,9 +47,3 @@ def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir):
         [call.val_dataloader()] + [call.train_dataloader(), call.val_dataloader()] * 3 + [call.test_dataloader()]
     )
     assert tracker.mock_calls == expected_sequence
-
-
-def test_v1_6_0_deprecated_disable_validation():
-    trainer = Trainer()
-    with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"):
-        _ = trainer.disable_validation