From d577f461a49e8138bb01417727e3032ff01d4c42 Mon Sep 17 00:00:00 2001 From: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Date: Wed, 10 Nov 2021 21:05:48 +0530 Subject: [PATCH 01/18] Remove deprecated `utilities.distributed.rank_zero_{warn,deprecation}` (#10451) --- CHANGELOG.md | 3 +++ pytorch_lightning/callbacks/lr_monitor.py | 3 +-- pytorch_lightning/utilities/distributed.py | 20 -------------------- tests/deprecated_api/test_remove_1-6.py | 8 -------- 4 files changed, 4 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 495c9e2398df0..0082201aa1cf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,6 +110,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * ([#10448](https://github.com/PyTorchLightning/pytorch-lightning/pull/10448)) +- Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451)) + + ### Fixed - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702)) diff --git a/pytorch_lightning/callbacks/lr_monitor.py b/pytorch_lightning/callbacks/lr_monitor.py index c9875cae83e62..d72f42d8f8616 100644 --- a/pytorch_lightning/callbacks/lr_monitor.py +++ b/pytorch_lightning/callbacks/lr_monitor.py @@ -27,8 +27,7 @@ import pytorch_lightning as pl from pytorch_lightning.callbacks.base import Callback -from pytorch_lightning.utilities import rank_zero_warn -from pytorch_lightning.utilities.distributed import rank_zero_deprecation +from pytorch_lightning.utilities import rank_zero_deprecation, rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index b99f5213d02d8..1740518923c0f 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -66,26 +66,6 @@ def _get_rank() -> int: rank_zero_only.rank = getattr(rank_zero_only, "rank", _get_rank()) -def rank_zero_warn(*args: Any, stacklevel: int = 5, **kwargs: Any) -> None: - from pytorch_lightning.utilities.warnings import rank_zero_deprecation, rank_zero_warn - - rank_zero_deprecation( - "`pytorch_lightning.utilities.distributed.rank_zero_warn` has been moved to" - " `pytorch_lightning.utilities.rank_zero_warn` in v1.3.7 and will be removed in v1.6" - ) - return rank_zero_warn(*args, stacklevel=stacklevel, **kwargs) - - -def rank_zero_deprecation(*args: Any, stacklevel: int = 5, **kwargs: Any) -> None: - from pytorch_lightning.utilities.warnings import rank_zero_deprecation - - rank_zero_deprecation( - "`pytorch_lightning.utilities.distributed.rank_zero_deprecation` has been moved to" - " `pytorch_lightning.utilities.rank_zero_deprecation` in v1.3.7 and will be removed in v1.6" - ) - return rank_zero_deprecation(*args, stacklevel=stacklevel, **kwargs) - - def _info(*args: Any, stacklevel: int = 2, **kwargs: Any) -> None: if python_version() >= "3.8.0": kwargs["stacklevel"] = stacklevel diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index a64d28ebefecc..686339df6317c 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -17,7 +17,6 @@ import pytest from pytorch_lightning import Trainer -from pytorch_lightning.utilities.distributed import rank_zero_deprecation, rank_zero_warn from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.model_summary import ModelSummary from tests.helpers import BoringModel @@ -78,13 +77,6 @@ def test_v1_6_0_train_loop(tmpdir): _ = trainer.train_loop -def test_v1_6_0_rank_zero_warnings_moved(): - with pytest.deprecated_call(match="in v1.3.7 and will be removed in v1.6"): - rank_zero_warn("test") - with pytest.deprecated_call(match="in v1.3.7 and will be removed in v1.6"): - rank_zero_deprecation("test") - - def test_v1_6_0_deprecated_model_summary_mode(tmpdir): model = BoringModel() with pytest.deprecated_call(match="Argument `mode` in `ModelSummary` is deprecated in v1.4"): From d2aaf6b4cc420a4ef2aa4d1db29a0e881cea9406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Wed, 10 Nov 2021 17:59:10 +0100 Subject: [PATCH 02/18] Upgrade CI after the 1.10 release (#10075) --- .github/workflows/ci_dockers.yml | 8 +- .github/workflows/ci_pkg-install.yml | 4 +- .github/workflows/ci_schema.yml | 4 +- .github/workflows/ci_test-base.yml | 5 +- .github/workflows/ci_test-conda.yml | 4 +- .github/workflows/ci_test-full.yml | 4 +- .github/workflows/ci_test-mnodes.yml | 210 ------------------------- .github/workflows/code-checks.yml | 5 +- .github/workflows/docs-checks.yml | 4 +- .github/workflows/events-nightly.yml | 2 +- .github/workflows/events-recurrent.yml | 2 +- .github/workflows/release-docker.yml | 8 +- .github/workflows/release-pypi.yml | 2 +- README.md | 22 +-- dockers/base-cuda/Dockerfile | 2 +- dockers/base-xla/Dockerfile | 2 +- dockers/release/Dockerfile | 2 +- dockers/tpu-tests/Dockerfile | 2 +- 18 files changed, 41 insertions(+), 251 deletions(-) delete mode 100644 .github/workflows/ci_test-mnodes.yml diff --git a/.github/workflows/ci_dockers.yml b/.github/workflows/ci_dockers.yml index 02426529574f6..bd45247e15df2 100644 --- a/.github/workflows/ci_dockers.yml +++ b/.github/workflows/ci_dockers.yml @@ -1,4 +1,4 @@ -name: CI build Docker +name: Docker # https://www.docker.com/blog/first-docker-github-action-is-here # https://github.com/docker/build-push-action # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows @@ -23,9 +23,9 @@ jobs: strategy: fail-fast: false matrix: - # should be the config used in '.github/workflows/release-docker.yml', but we just keep one to check. - python_version: ["3.9"] - pytorch_version: ["1.9"] + # the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image + python_version: ["3.7"] + pytorch_version: ["1.8"] steps: - name: Checkout uses: actions/checkout@v2 diff --git a/.github/workflows/ci_pkg-install.yml b/.github/workflows/ci_pkg-install.yml index 12f3976d078e4..1fd7ed49d5a47 100644 --- a/.github/workflows/ci_pkg-install.yml +++ b/.github/workflows/ci_pkg-install.yml @@ -1,4 +1,4 @@ -name: Install pkg +name: Package # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows on: # Trigger the workflow on push or pull request, but only for the master branch @@ -9,7 +9,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bra jobs: - pkg-install: + install: runs-on: ${{ matrix.os }} strategy: fail-fast: false diff --git a/.github/workflows/ci_schema.yml b/.github/workflows/ci_schema.yml index 51c4400666fd0..d635285fae39a 100644 --- a/.github/workflows/ci_schema.yml +++ b/.github/workflows/ci_schema.yml @@ -1,11 +1,11 @@ -name: CI action schema +name: Schema on: # Trigger the workflow on push or pull request, but only for the master branch push: {} pull_request: branches: [master, "release/*"] jobs: - validate-schema: + check: runs-on: ubuntu-20.04 steps: - name: Checkout diff --git a/.github/workflows/ci_test-base.yml b/.github/workflows/ci_test-base.yml index e92249cab4030..03871420c09df 100644 --- a/.github/workflows/ci_test-base.yml +++ b/.github/workflows/ci_test-base.yml @@ -1,6 +1,6 @@ # this jobs runs `pytest` over the source directory. It does not install any extra dependencies. # this is useful to catch errors where an import has been added which is not part of the basic dependencies. -name: CI basic testing +name: Test # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows on: # Trigger the workflow on push or pull request, but only for the master branch @@ -10,8 +10,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bra branches: [master, "release/*"] jobs: - doctest: - + source: runs-on: ${{ matrix.os }} strategy: fail-fast: false diff --git a/.github/workflows/ci_test-conda.yml b/.github/workflows/ci_test-conda.yml index e0808a79fd384..f996c8cc7dcea 100644 --- a/.github/workflows/ci_test-conda.yml +++ b/.github/workflows/ci_test-conda.yml @@ -1,4 +1,4 @@ -name: PyTorch & Conda +name: Test # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows on: # Trigger the workflow on push or pull request, but only for the master branch @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8"] # previous to last Python version as that one is already used in test-full - pytorch-version: ["1.7", "1.8", "1.9", "1.10"] + pytorch-version: ["1.7", "1.8", "1.9", "1.10"] # nightly: add when there's a release candidate # Timeout: https://stackoverflow.com/a/59076067/4521646 timeout-minutes: 35 diff --git a/.github/workflows/ci_test-full.yml b/.github/workflows/ci_test-full.yml index 57aedf68dcb84..c86fb458f403e 100644 --- a/.github/workflows/ci_test-full.yml +++ b/.github/workflows/ci_test-full.yml @@ -1,4 +1,4 @@ -name: CI complete testing +name: Test # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows on: # Trigger the workflow on push or pull request, but only for the master branch @@ -10,7 +10,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bra jobs: - pytest: + cpu: runs-on: ${{ matrix.os }} if: github.event.pull_request.draft == false diff --git a/.github/workflows/ci_test-mnodes.yml b/.github/workflows/ci_test-mnodes.yml deleted file mode 100644 index fbc0dc2b6e924..0000000000000 --- a/.github/workflows/ci_test-mnodes.yml +++ /dev/null @@ -1,210 +0,0 @@ -name: Multi Nodes GPU Tests - -# Workflow Steps: -# 1. Checkout Pytorch Lightning -# 2. Set up Python -# 3. Configure AWS Credentials -# 4. Install AWS Client -# 5. Get Current Sha Commit -# 6. Create Job Name -# 7. Update Test Configuration File -# 8. Install EKSClient -# 9. Create Gpu Node Pool -# 10. Check Current Node Pool | Current Elatic Pods -# 11. Apply Elastic -# 12. Wait 5 sec -# 13. Find ETCD TCP Address -# 14. Update Test Configuration File -# 15. Apply Multi Node Testing -# 16. Wait 120 secs -# 17. Listen to Jobs Logging -# 18. Statistics -# 19. Upload coverage results -# 20. Upload coverage to Codecov -# 21. Delete Group Node - -on: - push: - branches: - - never-ever-run- - #pull_request: - # types: [closed] - -env: - AWS_CLUSTER: pl-lightning-torchelastic - NODE_TYPE: g4dn.xlarge - NODES: 2 - NUM_GPUS: 1 - REGION: us-east-2 - MAX_CHECKS: 300 - CHECK_SPEEP: 2 - -jobs: - multi-nodes-gpu-testing: - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - python-version: [3.7] - pytorch-version: [1.6] - # Timeout: https://stackoverflow.com/a/59076067/4521646 - timeout-minutes: 50 - - # runs only when merged happened. - # if: github.event.pull_request.merged == true - steps: - - - name: Checkout Pytorch Lightning - uses: actions/checkout@v2 - with: - repository: PyTorchLightning/pytorch-lightning - ref: ${{ github.event.base_ref }} - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Weekly reset caching - run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)" - id: times - - # Note: This uses an internal pip API and may not always work - # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow - - name: Cache pip - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-td${{ steps.times.outputs.period }}-multi-node - restore-keys: | - ${{ runner.os }}-pip-td${{ steps.times.outputs.period }}- - - - name: Install dependencies - run: | - pip install awscli coverage - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY_ID }} - aws-region: us-east-2 - - - name: Get Current Sha Commit - id: vars - shell: bash - run: | - echo "::set-output name=SHA::$(git rev-parse --short HEAD)" - echo $PWD - - - name: Create Job Name - id: job - shell: bash - run: | - echo "::set-output name=ID::$(echo '${{ steps.vars.outputs.SHA }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}' | tr . - )" - echo "::set-output name=ID_NAME::$(echo 's-${{ steps.vars.outputs.SHA }}-${{ matrix.python-version }}-${{ matrix.pytorch-version }}-e' | tr . - )" - - - name: Install EKSClient - run: | - curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp - sudo mv /tmp/eksctl /usr/local/bin - shell: bash - - - name: Create Gpu Node Pool - run: | - aws eks --region $REGION update-kubeconfig --name $AWS_CLUSTER - eksctl create nodegroup --name=${{ steps.job.outputs.ID }} --cluster=$AWS_CLUSTER --node-type=$NODE_TYPE --nodes=$NODES - # eksctl create nodegroup --name=${{ steps.job.outputs.ID }} --cluster=$AWS_CLUSTER --managed --spot --node-type=$NODE_TYPE --nodes=$NODES - shell: bash - - - name: Check Current Node Pool | Current Elatic Pods - run: | - eksctl get nodegroups --cluster $AWS_CLUSTER - kubectl get pods -n elastic-job - - - name: Apply Elastic - run: | - git clone https://github.com/pytorch/elastic.git - cd elastic/kubernetes - - kubectl apply -k config/default - - kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/master/nvidia-device-plugin.yml - kubectl apply -f https://raw.githubusercontent.com/pytorch/elastic/master/kubernetes/config/samples/etcd.yaml - - - name: Wait - # todo: this shall be dynamic - if: always() - shell: bash - run: | - sleep 5 - - - name: Find ETCD TCP Address - id: tcp - shell: bash - run: | - echo "::set-output name=TCP_ADDRESS::$(kubectl logs etcd -n elastic-job | grep -Eo '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]{1,4}' | head -1)" - - - name: Update Test Config. File - run: | - import os - from dtrun.configs import prepare_multi_nodes_gpu_config - - assert os.path.isfile('./tests/mnode_tests.txt') - prepare_multi_nodes_gpu_config( - './.github/multi-nodes-gpu.yaml', - './tests/mnode_tests.txt', - sha="${{ steps.vars.outputs.SHA }}", - tcp_address="${{ steps.tcp.outputs.TCP_ADDRESS }}", - python_version="${{ matrix.python-version }}", - torch_version="${{ matrix.pytorch-version }}", - num_gpus=1, - ) - shell: python - - - name: Apply Multi Node Testing - run: | - # cat ./.github/multi-nodes-gpu.yaml - kubectl apply -f ./.github/multi-nodes-gpu.yaml - shell: bash - - - name: Wait - # todo: this shall be dynamic - if: always() - shell: bash - run: | - sleep 400 - - - name: Listen to Jobs Logging - shell: bash - run: | - # todo: Enable automatic checking. - # while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl logs ${{ steps.job.outputs.ID_NAME }}-worker-0 -n elastic-job | grep -i "error\|failed"; then status_code=1 && break; elif kubectl logs ${{ steps.job.outputs.ID }}-worker-0 -n elastic-job | grep "TEST END"; then status_code=0 && break; else printf "." ; fi; sleep $CHECK_SPEEP; done && \ - # echo "Done waiting. Job status code: $status_code" && \ - kubectl logs ${{ steps.job.outputs.ID_NAME }}-worker-0 -n elastic-job > /tmp/full_output.txt - if grep -q 'END_TOKEN' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/END_TOKEN/'; else mv /tmp/full_output.txt xx00; fi && \ - cat xx00 - - - name: Statistics - if: success() - run: | - cat ./xx01 | tail -n +2 | base64 --decode > /home/runner/work/pytorch-lightning/pytorch-lightning/.coverage - cd /home/runner/work/pytorch-lightning/pytorch-lightning && coverage report && coverage xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 - if: always() - # see: https://github.com/actions/toolkit/issues/399 - continue-on-error: true - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: coverage.xml - flags: multi-nodes,pytest - name: multi-nodes-coverage - fail_ci_if_error: false - - - name: Delete Group Node - if: always() - run: | - kubectl delete ElasticJob ${{ steps.job.outputs.ID_NAME }} -n elastic-job - eksctl delete nodegroup ${{ steps.job.outputs.ID }} --cluster=$AWS_CLUSTER diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 1cedf2c360306..e99863dc794d4 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -1,4 +1,4 @@ -name: "Check code" +name: Test on: # Trigger the workflow on push or pull request, but only for the master branch push: @@ -7,8 +7,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bra branches: [master, "release/*"] jobs: - python-typing-mypy: - name: Python typing Mypy + mypy: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@master diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml index 9d6b660a168f8..841f9128da8b1 100644 --- a/.github/workflows/docs-checks.yml +++ b/.github/workflows/docs-checks.yml @@ -1,4 +1,4 @@ -name: "Docs check" +name: Test # https://github.com/marketplace/actions/sphinx-build on: # Trigger the workflow on push or pull request, but only for the master branch @@ -8,7 +8,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bra branches: [master, "release/*"] jobs: - test-docs: + doctest: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml index f450e98380f10..7c2075ce5b440 100644 --- a/.github/workflows/events-nightly.yml +++ b/.github/workflows/events-nightly.yml @@ -1,4 +1,4 @@ -name: Nightly events +name: Nightly # https://jasonet.co/posts/scheduled-actions/ # https://github.community/t/distinct-job-for-each-schedule/17811/2 diff --git a/.github/workflows/events-recurrent.yml b/.github/workflows/events-recurrent.yml index d7f1872fde732..834adc6c169fa 100644 --- a/.github/workflows/events-recurrent.yml +++ b/.github/workflows/events-recurrent.yml @@ -1,4 +1,4 @@ -name: Recurrent events +name: Recurrent # https://jasonet.co/posts/scheduled-actions/ # https://github.community/t/distinct-job-for-each-schedule/17811/2 diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index f7017d35d9e88..169e01edd8d48 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -1,4 +1,4 @@ -name: Publish Docker Releases +name: Docker # https://www.docker.com/blog/first-docker-github-action-is-here # https://github.com/docker/build-push-action on: @@ -8,7 +8,7 @@ on: types: [published] jobs: - cuda-PL: + publish: runs-on: ubuntu-20.04 # only on releases if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: python_version: ["3.6", "3.7", "3.8", "3.9"] - pytorch_version: ["1.7", "1.8", "1.9"] + pytorch_version: ["1.7", "1.8", "1.9", "1.10"] steps: - name: Checkout uses: actions/checkout@v2 @@ -39,7 +39,7 @@ jobs: - name: Publish Latest to Docker uses: docker/build-push-action@v1.1.0 # only on releases and latest Python and PyTorch - if: matrix.python_version == 3.9 && matrix.pytorch_version == 1.9 + if: matrix.python_version == "3.9" && matrix.pytorch_version == "1.10" with: repository: pytorchlightning/pytorch_lightning username: ${{ secrets.DOCKER_USERNAME }} diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index a91837cab3340..09afd4db893d3 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -1,4 +1,4 @@ -name: PyPI Release +name: PyPI # https://help.github.com/en/actions/reference/events-that-trigger-workflows on: # Trigger the workflow on push or pull request, but only for the master branch diff --git a/README.md b/README.md index 6e0e4b5dbf52d..9d618955e1140 100644 --- a/README.md +++ b/README.md @@ -78,18 +78,20 @@ Lightning is rigorously tested across multiple GPUs, TPUs CPUs and against major
-| System / PyTorch ver. | 1.6 (min. req.) | 1.7 | 1.8 (LTS) | 1.9 (latest) | 1.10 (nightly) | -| :------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Conda py3.7 \[linux\] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | -| Linux py3.7 \[GPUs\*\*\] | - | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | -| Linux py3.7 \[TPUs\*\*\*\] | - | - | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - | -| Linux py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | -| OSX py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | -| Windows py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | +| System / PyTorch ver. | 1.7 (min. req.) | 1.8 (LTS) | 1.9 | 1.10 (latest) | +| :------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Linux py3.7 \[GPUs\*\*\] | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | +| Linux py3.7 \[TPUs\*\*\*\] | - | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - | +| Linux py3.8 (with Conda | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | +| Linux py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | +| OSX py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | +| Windows py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | +| Linux py3.6 | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | - | +| OSX py3.6 | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | - | +| Windows py3.6 | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | - | - _\*\* tests run on two NVIDIA P100_ -- _\*\*\* tests run on Google GKE TPUv2/3_ -- _TPU py3.7 means we support Colab and Kaggle env._ +- _\*\*\* tests run on Google GKE TPUv2/3. TPU py3.7 means we support Colab and Kaggle env._
diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index ab26af6c7accf..99e8d018f2884 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -17,7 +17,7 @@ ARG CUDA_VERSION=10.2 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04 ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.6 +ARG PYTORCH_VERSION=1.8 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile index 5c86da2147717..e293343614927 100644 --- a/dockers/base-xla/Dockerfile +++ b/dockers/base-xla/Dockerfile @@ -19,7 +19,7 @@ LABEL maintainer="PyTorchLightning " # CALL: docker image build -t pytorch-lightning:XLA-extras-py3.6 -f dockers/base-xla/Dockerfile . --build-arg PYTHON_VERSION=3.8 ARG PYTHON_VERSION=3.9 ARG CONDA_VERSION=4.9.2 -ARG XLA_VERSION=1.6 +ARG XLA_VERSION=1.8 SHELL ["/bin/bash", "-c"] # for skipping configurations diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index 529680059791c..f4083f2dd42fc 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.6 +ARG PYTORCH_VERSION=1.8 FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} diff --git a/dockers/tpu-tests/Dockerfile b/dockers/tpu-tests/Dockerfile index 086bd349bc757..6605b9abbaadc 100644 --- a/dockers/tpu-tests/Dockerfile +++ b/dockers/tpu-tests/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.6 +ARG PYTORCH_VERSION=1.8 FROM pytorchlightning/pytorch_lightning:base-xla-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} From 5ba5b7247336a1a8450fd579b2fe8495f827428d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Thu, 11 Nov 2021 18:15:18 +0100 Subject: [PATCH 03/18] Update tests to avoid the deprecated `weights_summary` (#10446) --- pytorch_lightning/core/lightning.py | 2 +- pytorch_lightning/tuner/batch_size_scaling.py | 3 - tests/callbacks/test_model_summary.py | 17 +---- tests/deprecated_api/test_remove_1-7.py | 7 +- tests/tuner/test_scale_batch_size.py | 1 - tests/utilities/test_model_summary.py | 74 +++++++++++-------- 6 files changed, 50 insertions(+), 54 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 7867211badb35..bf335ec8b7acc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1708,7 +1708,7 @@ def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None Return: The model summary object """ - warning_cache.deprecation( + rank_zero_deprecation( "The `LightningModule.summarize` method is deprecated in v1.5 and will be removed in v1.7. " "Use `pytorch_lightning.utilities.model_summary.summarize` instead.", stacklevel=6, diff --git a/pytorch_lightning/tuner/batch_size_scaling.py b/pytorch_lightning/tuner/batch_size_scaling.py index faf2ee4f5bb9c..bf6e0cff49772 100644 --- a/pytorch_lightning/tuner/batch_size_scaling.py +++ b/pytorch_lightning/tuner/batch_size_scaling.py @@ -106,7 +106,6 @@ def __scale_batch_dump_params(trainer: "pl.Trainer") -> None: "current_epoch": trainer.current_epoch, "global_step": trainer.global_step, "max_steps": trainer.max_steps, - "weights_summary": trainer.weights_summary, "logger": trainer.logger, "callbacks": trainer.callbacks, "checkpoint_callback": trainer.checkpoint_callback, @@ -121,7 +120,6 @@ def __scale_batch_reset_params(trainer: "pl.Trainer", model: "pl.LightningModule trainer.auto_lr_find = False # avoid lr find being called multiple times trainer.fit_loop.current_epoch = 0 trainer.fit_loop.max_steps = steps_per_trial # take few steps - trainer.weights_summary = None # not needed before full run trainer.logger = DummyLogger() if trainer.logger is not None else None trainer.callbacks = [] # not needed before full run trainer.limit_train_batches = 1.0 @@ -134,7 +132,6 @@ def __scale_batch_restore_params(trainer: "pl.Trainer") -> None: trainer.fit_loop.current_epoch = trainer.__dumped_params["current_epoch"] trainer.fit_loop.global_step = trainer.__dumped_params["global_step"] trainer.fit_loop.max_steps = trainer.__dumped_params["max_steps"] - trainer.weights_summary = trainer.__dumped_params["weights_summary"] trainer.logger = trainer.__dumped_params["logger"] trainer.callbacks = trainer.__dumped_params["callbacks"] trainer.auto_scale_batch_size = trainer.__dumped_params["auto_scale_batch_size"] diff --git a/tests/callbacks/test_model_summary.py b/tests/callbacks/test_model_summary.py index a270d381f043a..f588d696c4e7e 100644 --- a/tests/callbacks/test_model_summary.py +++ b/tests/callbacks/test_model_summary.py @@ -17,8 +17,6 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelSummary -from pytorch_lightning.utilities import ModelSummaryMode -from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel @@ -48,26 +46,19 @@ def test_model_summary_callback_with_weights_summary_none(): def test_model_summary_callback_with_weights_summary(): - trainer = Trainer(weights_summary="top") - model_summary_callback = list(filter(lambda cb: isinstance(cb, ModelSummary), trainer.callbacks))[0] assert model_summary_callback._max_depth == 1 - trainer = Trainer(weights_summary="full") - + with pytest.deprecated_call(match=r"weights_summary=full\)` is deprecated"): + trainer = Trainer(weights_summary="full") model_summary_callback = list(filter(lambda cb: isinstance(cb, ModelSummary), trainer.callbacks))[0] assert model_summary_callback._max_depth == -1 - with pytest.raises( - MisconfigurationException, match=f"`weights_summary` can be None, {', '.join(list(ModelSummaryMode))}" - ): - _ = Trainer(weights_summary="invalid") - def test_model_summary_callback_override_weights_summary_flag(): - - trainer = Trainer(callbacks=ModelSummary(), weights_summary=None) + with pytest.deprecated_call(match=r"weights_summary=None\)` is deprecated"): + trainer = Trainer(callbacks=ModelSummary(), weights_summary=None) assert any(isinstance(cb, ModelSummary) for cb in trainer.callbacks) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index ec44d9842ce2a..4da10fb0b666a 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -38,12 +38,9 @@ def test_v1_7_0_deprecated_lightning_module_summarize(tmpdir): - from pytorch_lightning.core.lightning import warning_cache - model = BoringModel() - model.summarize(max_depth=1) - assert any("The `LightningModule.summarize` method is deprecated in v1.5" in w for w in warning_cache) - warning_cache.clear() + with pytest.deprecated_call(match="The `LightningModule.summarize` method is deprecated in v1.5"): + model.summarize(max_depth=1) def test_v1_7_0_moved_model_summary_and_layer_summary(tmpdir): diff --git a/tests/tuner/test_scale_batch_size.py b/tests/tuner/test_scale_batch_size.py index 9dbb24d9edf30..d8657e0e463d5 100644 --- a/tests/tuner/test_scale_batch_size.py +++ b/tests/tuner/test_scale_batch_size.py @@ -114,7 +114,6 @@ def test_trainer_reset_correctly(tmpdir): "logger", "max_steps", "global_step", - "weights_summary", ] expected = {ca: getattr(trainer, ca) for ca in changed_attributes} trainer.tuner.scale_batch_size(model, max_trials=5) diff --git a/tests/utilities/test_model_summary.py b/tests/utilities/test_model_summary.py index 1f44e0a74f68d..669892984f0cf 100644 --- a/tests/utilities/test_model_summary.py +++ b/tests/utilities/test_model_summary.py @@ -141,31 +141,41 @@ def forward(self, inp): def test_invalid_weights_summmary(): """Test that invalid value for weights_summary raises an error.""" + model = LightningModule() + with pytest.raises(MisconfigurationException, match="`mode` can be None, .* got temp"): - summarize(UnorderedModel, mode="temp") + summarize(model, mode="temp") - with pytest.raises(MisconfigurationException, match="`weights_summary` can be None, .* got temp"): + with pytest.raises( + MisconfigurationException, match="`weights_summary` can be None, .* got temp" + ), pytest.deprecated_call(match="weights_summary=temp)` is deprecated"): Trainer(weights_summary="temp") + with pytest.raises(MisconfigurationException, match="mode` can be .* got temp"): + ModelSummary(model, mode="temp") + + with pytest.raises(ValueError, match="max_depth` can be .* got temp"): + ModelSummary(model, max_depth="temp") + -@pytest.mark.parametrize("mode", ["full", "top"]) -def test_empty_model_summary_shapes(mode: str): +@pytest.mark.parametrize("max_depth", [-1, 1]) +def test_empty_model_summary_shapes(max_depth): """Test that the summary works for models that have no submodules.""" model = EmptyModule() - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert summary.in_sizes == [] assert summary.out_sizes == [] assert summary.param_nums == [] @RunIf(min_gpus=1) -@pytest.mark.parametrize("mode", ["full", "top"]) +@pytest.mark.parametrize("max_depth", [-1, 1]) @pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)]) -def test_linear_model_summary_shapes(device, mode): +def test_linear_model_summary_shapes(device, max_depth): """Test that the model summary correctly computes the input- and output shapes.""" model = UnorderedModel().to(device) model.train() - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert summary.in_sizes == [[2, 10], [2, 7], [2, 3], [2, 7], UNKNOWN_SIZE] # layer 2 # combine # layer 1 # relu assert summary.out_sizes == [[2, 2], [2, 9], [2, 5], [2, 7], UNKNOWN_SIZE] # layer 2 # combine # layer 1 # relu assert model.training @@ -191,8 +201,8 @@ def test_hooks_removed_after_summarize(max_depth): assert handle.id not in handle.hooks_dict_ref() -@pytest.mark.parametrize("mode", ["full", "top"]) -def test_rnn_summary_shapes(mode): +@pytest.mark.parametrize("max_depth", [-1, 1]) +def test_rnn_summary_shapes(max_depth): """Test that the model summary works for RNNs.""" model = ParityModuleRNN() @@ -204,16 +214,16 @@ def test_rnn_summary_shapes(mode): model.example_input_array = torch.zeros(b, t, 10) - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert summary.in_sizes == [[b, t, i], [b, t, h]] # rnn # linear assert summary.out_sizes == [[[b, t, h], [[1, b, h], [1, b, h]]], [b, t, o]] # rnn # linear -@pytest.mark.parametrize("mode", ["full", "top"]) -def test_summary_parameter_count(mode): +@pytest.mark.parametrize("max_depth", [-1, 1]) +def test_summary_parameter_count(max_depth): """Test that the summary counts the number of parameters in every submodule.""" model = UnorderedModel() - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert summary.param_nums == [ model.layer2.weight.numel() + model.layer2.bias.numel(), model.combine.weight.numel() + model.combine.bias.numel(), @@ -223,24 +233,24 @@ def test_summary_parameter_count(mode): ] -@pytest.mark.parametrize("mode", ["full", "top"]) -def test_summary_layer_types(mode): +@pytest.mark.parametrize("max_depth", [-1, 1]) +def test_summary_layer_types(max_depth): """Test that the summary displays the layer names correctly.""" model = UnorderedModel() - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert summary.layer_types == ["Linear", "Linear", "Linear", "ReLU", "Conv2d"] -@pytest.mark.parametrize("mode", ["full", "top"]) -def test_summary_with_scripted_modules(mode): +@pytest.mark.parametrize("max_depth", [-1, 1]) +def test_summary_with_scripted_modules(max_depth): model = PartialScriptModel() - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert summary.layer_types == ["RecursiveScriptModule", "Linear"] assert summary.in_sizes == [UNKNOWN_SIZE, [2, 3]] assert summary.out_sizes == [UNKNOWN_SIZE, [2, 2]] -@pytest.mark.parametrize("mode", ["full", "top"]) +@pytest.mark.parametrize("max_depth", [-1, 1]) @pytest.mark.parametrize( ["example_input", "expected_size"], [ @@ -253,7 +263,7 @@ def test_summary_with_scripted_modules(mode): ((torch.zeros(2, 3), torch.zeros(4, 5)), [[2, 3], [4, 5]]), ], ) -def test_example_input_array_types(example_input, expected_size, mode): +def test_example_input_array_types(example_input, expected_size, max_depth): """Test the types of example inputs supported for display in the summary.""" class DummyModule(nn.Module): @@ -271,23 +281,23 @@ def forward(self, *args, **kwargs): model = DummyLightningModule() model.example_input_array = example_input - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert summary.in_sizes == [expected_size] -@pytest.mark.parametrize("mode", ["full", "top"]) -def test_model_size(mode): +@pytest.mark.parametrize("max_depth", [-1, 1]) +def test_model_size(max_depth): """Test model size is calculated correctly.""" model = PreCalculatedModel() - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert model.pre_calculated_model_size == summary.model_size -@pytest.mark.parametrize("mode", ["full", "top"]) -def test_empty_model_size(mode): +@pytest.mark.parametrize("max_depth", [-1, 1]) +def test_empty_model_size(max_depth): """Test empty model size is zero.""" model = EmptyModule() - summary = summarize(model, mode=mode) + summary = summarize(model, max_depth=max_depth) assert 0.0 == summary.model_size @@ -328,11 +338,13 @@ def test_max_depth_equals_mode_interface(): """Test summarize(model, full/top) interface mapping matches max_depth.""" model = DeepNestedModel() - summary_top = summarize(model, mode="top") + with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"): + summary_top = summarize(model, mode="top") summary_0 = summarize(model, max_depth=1) assert str(summary_top) == str(summary_0) - summary_full = summarize(model, mode="full") + with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"): + summary_full = summarize(model, mode="full") summary_minus1 = summarize(model, max_depth=-1) assert str(summary_full) == str(summary_minus1) From 09cf167237e867f1ec67a5db87e5a02c2cea4b69 Mon Sep 17 00:00:00 2001 From: Raahul Singh Date: Fri, 12 Nov 2021 01:23:40 +0530 Subject: [PATCH 04/18] Change attributes of `RichProgressBarTheme` dataclass (#10454) Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> --- .../callbacks/progress/rich_progress.py | 34 +++++++++++++------ tests/callbacks/test_rich_progress_bar.py | 4 +-- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pytorch_lightning/callbacks/progress/rich_progress.py b/pytorch_lightning/callbacks/progress/rich_progress.py index b07b487927418..c091223fba0bd 100644 --- a/pytorch_lightning/callbacks/progress/rich_progress.py +++ b/pytorch_lightning/callbacks/progress/rich_progress.py @@ -129,11 +129,12 @@ def render(self, task) -> RenderableType: class MetricsTextColumn(ProgressColumn): """A column containing text.""" - def __init__(self, trainer): + def __init__(self, trainer, style): self._trainer = trainer self._tasks = {} self._current_task_id = 0 self._metrics = {} + self._style = style super().__init__() def update(self, metrics): @@ -158,23 +159,34 @@ def render(self, task) -> Text: for k, v in self._metrics.items(): _text += f"{k}: {round(v, 3) if isinstance(v, float) else v} " - return Text(_text, justify="left") + return Text(_text, justify="left", style=self._style) @dataclass class RichProgressBarTheme: """Styles to associate to different base components. + Args: + description: Style for the progress bar description. For eg., Epoch x, Testing, etc. + progress_bar: Style for the bar in progress. + progress_bar_finished: Style for the finished progress bar. + progress_bar_pulse: Style for the progress bar when `IterableDataset` is being processed. + batch_progress: Style for the progress tracker (i.e 10/50 batches completed). + time: Style for the processed time and estimate time remaining. + processing_speed: Style for the speed of the batches being processed. + metrics: Style for the metrics + https://rich.readthedocs.io/en/stable/style.html """ - text_color: str = "white" - progress_bar_complete: Union[str, Style] = "#6206E0" + description: Union[str, Style] = "white" + progress_bar: Union[str, Style] = "#6206E0" progress_bar_finished: Union[str, Style] = "#6206E0" progress_bar_pulse: Union[str, Style] = "#6206E0" - batch_process: str = "white" - time: str = "grey54" - processing_speed: str = "grey70" + batch_progress: Union[str, Style] = "white" + time: Union[str, Style] = "grey54" + processing_speed: Union[str, Style] = "grey70" + metrics: Union[str, Style] = "white" class RichProgressBar(ProgressBarBase): @@ -273,7 +285,7 @@ def _init_progress(self, trainer): self._reset_progress_bar_ids() self._console: Console = Console() self._console.clear_live() - self._metric_component = MetricsTextColumn(trainer) + self._metric_component = MetricsTextColumn(trainer, self.theme.metrics) self.progress = CustomProgress( *self.configure_columns(trainer), self._metric_component, @@ -356,7 +368,7 @@ def on_validation_epoch_start(self, trainer, pl_module): def _add_task(self, total_batches: int, description: str, visible: bool = True) -> Optional[int]: if self.progress is not None: return self.progress.add_task( - f"[{self.theme.text_color}]{description}", total=total_batches, visible=visible + f"[{self.theme.description}]{description}", total=total_batches, visible=visible ) def _update(self, progress_bar_id: int, visible: bool = True) -> None: @@ -453,11 +465,11 @@ def configure_columns(self, trainer) -> list: return [ TextColumn("[progress.description]{task.description}"), CustomBarColumn( - complete_style=self.theme.progress_bar_complete, + complete_style=self.theme.progress_bar, finished_style=self.theme.progress_bar_finished, pulse_style=self.theme.progress_bar_pulse, ), - BatchesProcessedColumn(style=self.theme.batch_process), + BatchesProcessedColumn(style=self.theme.batch_progress), CustomTimeColumn(style=self.theme.time), ProcessingSpeedColumn(style=self.theme.processing_speed), ] diff --git a/tests/callbacks/test_rich_progress_bar.py b/tests/callbacks/test_rich_progress_bar.py index 31681754423a8..8f3f20630b5c0 100644 --- a/tests/callbacks/test_rich_progress_bar.py +++ b/tests/callbacks/test_rich_progress_bar.py @@ -106,11 +106,11 @@ def test_rich_progress_bar_custom_theme(tmpdir): assert progress_bar.theme == theme args, kwargs = mocks["CustomBarColumn"].call_args - assert kwargs["complete_style"] == theme.progress_bar_complete + assert kwargs["complete_style"] == theme.progress_bar assert kwargs["finished_style"] == theme.progress_bar_finished args, kwargs = mocks["BatchesProcessedColumn"].call_args - assert kwargs["style"] == theme.batch_process + assert kwargs["style"] == theme.batch_progress args, kwargs = mocks["CustomTimeColumn"].call_args assert kwargs["style"] == theme.time From fa0ed17f8a18e887ba272a057e7c35c61d7f04fa Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Fri, 12 Nov 2021 18:12:25 +0530 Subject: [PATCH 05/18] remove deprecated train_loop (#10482) * remove deprecated train_loop * chlog --- CHANGELOG.md | 3 +++ docs/source/common/lightning_module.rst | 4 ++-- pytorch_lightning/trainer/trainer.py | 7 ------- tests/deprecated_api/test_remove_1-6.py | 8 -------- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0082201aa1cf9..87ecfdef4d448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -113,6 +113,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451)) +- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) + + ### Fixed - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702)) diff --git a/docs/source/common/lightning_module.rst b/docs/source/common/lightning_module.rst index 77a2c719736be..166b0b2384461 100644 --- a/docs/source/common/lightning_module.rst +++ b/docs/source/common/lightning_module.rst @@ -1167,14 +1167,14 @@ for more information. on_train_start() for epoch in epochs: - train_loop() + fit_loop() on_train_end() on_fit_end() teardown("fit") - def train_loop(): + def fit_loop(): on_epoch_start() on_train_epoch_start() diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b6dfcbfee8bc6..396289000251d 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -2134,13 +2134,6 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__ = state - @property - def train_loop(self) -> FitLoop: - rank_zero_deprecation( - "`Trainer.train_loop` has been renamed to `Trainer.fit_loop` and will be removed in v1.6." - ) - return self.fit_loop - @property def terminate_on_nan(self) -> bool: rank_zero_deprecation("`Trainer.terminate_on_nan` is deprecated in v1.5 and will be removed in 1.7.") diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 686339df6317c..066922c8f4d16 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -69,14 +69,6 @@ def test_v1_6_0_is_overridden_model(): assert not is_overridden("foo", model=model) -def test_v1_6_0_train_loop(tmpdir): - trainer = Trainer() - with pytest.deprecated_call( - match=r"`Trainer.train_loop` has been renamed to `Trainer.fit_loop` and will be removed in v1.6." - ): - _ = trainer.train_loop - - def test_v1_6_0_deprecated_model_summary_mode(tmpdir): model = BoringModel() with pytest.deprecated_call(match="Argument `mode` in `ModelSummary` is deprecated in v1.4"): From 847e24011af32dcf7bf14577b1e2e0a5fe3fb727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 12 Nov 2021 19:03:47 +0100 Subject: [PATCH 06/18] Squeeze the early stopping monitor (#10461) --- CHANGELOG.md | 2 +- pytorch_lightning/callbacks/early_stopping.py | 2 +- tests/callbacks/test_early_stopping.py | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87ecfdef4d448..bad88d4411e9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -124,7 +124,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374)) -- +- Squeeze the early stopping monitor to remove empty tensor dimensions ([#10461](https://github.com/PyTorchLightning/pytorch-lightning/issues/10461)) - diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py index 03b268f714a74..e292cd961711a 100644 --- a/pytorch_lightning/callbacks/early_stopping.py +++ b/pytorch_lightning/callbacks/early_stopping.py @@ -196,7 +196,7 @@ def _run_early_stopping_check(self, trainer: "pl.Trainer") -> None: ): # short circuit if metric not present return - current = logs.get(self.monitor) + current = logs[self.monitor].squeeze() should_stop, reason = self._evaluate_stopping_criteria(current) # stop every ddp process if any world process decides to stop diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py index 9b20b96778e65..da200cc336504 100644 --- a/tests/callbacks/test_early_stopping.py +++ b/tests/callbacks/test_early_stopping.py @@ -469,3 +469,16 @@ def validation_step(self, batch, batch_idx): assert trainer.global_step == len(side_effect) * int(trainer.limit_train_batches * trainer.val_check_interval) else: assert trainer.current_epoch == len(side_effect) * trainer.check_val_every_n_epoch - 1 + + +def test_early_stopping_squeezes(): + early_stopping = EarlyStopping(monitor="foo") + trainer = Trainer() + trainer.callback_metrics["foo"] = torch.tensor([[[0]]]) + + with mock.patch( + "pytorch_lightning.callbacks.EarlyStopping._evaluate_stopping_criteria", return_value=(False, "") + ) as es_mock: + early_stopping._run_early_stopping_check(trainer) + + es_mock.assert_called_once_with(torch.tensor(0)) From fabb3644027a12c0df9acbdbc752d7ca7673396c Mon Sep 17 00:00:00 2001 From: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Date: Sat, 13 Nov 2021 01:02:43 +0530 Subject: [PATCH 07/18] Remove deprecated `mode` argument from ModelSummary (#10449) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 3 ++ pytorch_lightning/core/lightning.py | 9 +--- pytorch_lightning/utilities/model_summary.py | 53 ++------------------ tests/deprecated_api/test_remove_1-6.py | 10 ---- tests/utilities/test_model_summary.py | 21 -------- 5 files changed, 9 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bad88d4411e9f..3795f99684f47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -113,6 +113,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451)) +- Removed deprecated `mode` argument from `ModelSummary` class ([#10449](https://github.com/PyTorchLightning/pytorch-lightning/pull/10449)) + + - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index bf335ec8b7acc..a2fc9d1a21d4b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1689,7 +1689,7 @@ def tbptt_split_batch(self, batch, split_size): return splits - def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None) -> Optional[ModelSummary]: + def summarize(self, max_depth: int = 1) -> ModelSummary: """Summarize this LightningModule. .. deprecated:: v1.5 @@ -1697,11 +1697,6 @@ def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None and will be removed in v1.7. Args: - mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers). - - .. deprecated:: v1.4 - This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6. - max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the layer summary off. Default: 1. @@ -1714,7 +1709,7 @@ def summarize(self, mode: Optional[str] = "top", max_depth: Optional[int] = None stacklevel=6, ) - return summarize(self, mode, max_depth) + return summarize(self, max_depth) def freeze(self) -> None: r""" diff --git a/pytorch_lightning/utilities/model_summary.py b/pytorch_lightning/utilities/model_summary.py index 9c2690202df90..bab6da5368b65 100644 --- a/pytorch_lightning/utilities/model_summary.py +++ b/pytorch_lightning/utilities/model_summary.py @@ -23,8 +23,7 @@ from torch.utils.hooks import RemovableHandle import pytorch_lightning as pl -from pytorch_lightning.utilities import AMPType, DeviceType, ModelSummaryMode, rank_zero_deprecation -from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities import AMPType, DeviceType from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8 from pytorch_lightning.utilities.warnings import WarningCache @@ -130,13 +129,6 @@ class ModelSummary: Args: model: The model to summarize (also referred to as the root module). - mode: Can be one of - - - `top` (default): only the top-level modules will be recorded (the children of the root module) - - `full`: summarizes all layers and their submodules in the root module - - .. deprecated:: v1.4 - This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6. max_depth: Maximum depth of modules to show. Use -1 to show all modules or 0 to show no summary. Defaults to 1. @@ -186,22 +178,9 @@ class ModelSummary: 0.530 Total estimated model params size (MB) """ - def __init__(self, model: "pl.LightningModule", mode: Optional[str] = None, max_depth: Optional[int] = 1) -> None: + def __init__(self, model: "pl.LightningModule", max_depth: int = 1) -> None: self._model = model - # temporary mapping from mode to max_depth - if max_depth is None or mode is not None: - if mode in ModelSummaryMode.supported_types(): - max_depth = ModelSummaryMode.get_max_depth(mode) - rank_zero_deprecation( - "Argument `mode` in `ModelSummary` is deprecated in v1.4" - f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behaviour." - ) - else: - raise MisconfigurationException( - f"`mode` can be {', '.join(ModelSummaryMode.supported_types())}, got {mode}." - ) - if not isinstance(max_depth, int) or max_depth < -1: raise ValueError(f"`max_depth` can be -1, 0 or > 0, got {max_depth}.") @@ -436,17 +415,11 @@ def _is_lazy_weight_tensor(p: Tensor) -> bool: return False -def summarize( - lightning_module: "pl.LightningModule", mode: Optional[str] = None, max_depth: Optional[int] = None -) -> ModelSummary: +def summarize(lightning_module: "pl.LightningModule", max_depth: int = 1) -> ModelSummary: """Summarize the LightningModule specified by `lightning_module`. Args: lightning_module: `LightningModule` to summarize. - mode: Can be either ``'top'`` (summarize only direct submodules) or ``'full'`` (summarize all layers). - - .. deprecated:: v1.4 - This parameter was deprecated in v1.4 in favor of `max_depth` and will be removed in v1.6. max_depth: The maximum depth of layer nesting that the summary will include. A value of 0 turns the layer summary off. Default: 1. @@ -454,22 +427,4 @@ def summarize( Return: The model summary object """ - - # temporary mapping from mode to max_depth - if max_depth is None: - if mode is None: - model_summary = ModelSummary(lightning_module, max_depth=1) - elif mode in ModelSummaryMode.supported_types(): - max_depth = ModelSummaryMode.get_max_depth(mode) - rank_zero_deprecation( - "Argument `mode` in `LightningModule.summarize` is deprecated in v1.4" - f" and will be removed in v1.6. Use `max_depth={max_depth}` to replicate `mode={mode}` behavior." - ) - model_summary = ModelSummary(lightning_module, max_depth=max_depth) - else: - raise MisconfigurationException( - f"`mode` can be None, {', '.join(ModelSummaryMode.supported_types())}, got {mode}" - ) - else: - model_summary = ModelSummary(lightning_module, max_depth=max_depth) - return model_summary + return ModelSummary(lightning_module, max_depth=max_depth) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 066922c8f4d16..0e450caa18b62 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -18,7 +18,6 @@ from pytorch_lightning import Trainer from pytorch_lightning.utilities.model_helpers import is_overridden -from pytorch_lightning.utilities.model_summary import ModelSummary from tests.helpers import BoringModel @@ -69,15 +68,6 @@ def test_v1_6_0_is_overridden_model(): assert not is_overridden("foo", model=model) -def test_v1_6_0_deprecated_model_summary_mode(tmpdir): - model = BoringModel() - with pytest.deprecated_call(match="Argument `mode` in `ModelSummary` is deprecated in v1.4"): - ModelSummary(model, mode="top") - - with pytest.deprecated_call(match="Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"): - model.summarize(mode="top") - - def test_v1_6_0_deprecated_disable_validation(): trainer = Trainer() with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"): diff --git a/tests/utilities/test_model_summary.py b/tests/utilities/test_model_summary.py index 669892984f0cf..bc465e78fee38 100644 --- a/tests/utilities/test_model_summary.py +++ b/tests/utilities/test_model_summary.py @@ -143,17 +143,11 @@ def test_invalid_weights_summmary(): """Test that invalid value for weights_summary raises an error.""" model = LightningModule() - with pytest.raises(MisconfigurationException, match="`mode` can be None, .* got temp"): - summarize(model, mode="temp") - with pytest.raises( MisconfigurationException, match="`weights_summary` can be None, .* got temp" ), pytest.deprecated_call(match="weights_summary=temp)` is deprecated"): Trainer(weights_summary="temp") - with pytest.raises(MisconfigurationException, match="mode` can be .* got temp"): - ModelSummary(model, mode="temp") - with pytest.raises(ValueError, match="max_depth` can be .* got temp"): ModelSummary(model, max_depth="temp") @@ -334,21 +328,6 @@ def test_lazy_model_summary(): assert summary.trainable_parameters == 7 -def test_max_depth_equals_mode_interface(): - """Test summarize(model, full/top) interface mapping matches max_depth.""" - model = DeepNestedModel() - - with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"): - summary_top = summarize(model, mode="top") - summary_0 = summarize(model, max_depth=1) - assert str(summary_top) == str(summary_0) - - with pytest.deprecated_call(match="mode` in `LightningModule.summarize` is deprecated"): - summary_full = summarize(model, mode="full") - summary_minus1 = summarize(model, max_depth=-1) - assert str(summary_full) == str(summary_minus1) - - @pytest.mark.parametrize("max_depth", [-1, 0, 1, 3, 999]) def test_max_depth_param(max_depth): """Test that only the modules up to the desired depth are shown.""" From a8c2725ff8230a450f76f396fc372b7d5cb00076 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Sun, 14 Nov 2021 01:02:30 +0530 Subject: [PATCH 08/18] remove deprecated signature for `transfer_batch_to_device` (#10480) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrian Wälchli --- CHANGELOG.md | 4 ++++ pytorch_lightning/core/hooks.py | 2 +- pytorch_lightning/core/lightning.py | 12 +----------- tests/accelerators/test_dp.py | 2 +- tests/deprecated_api/test_remove_1-6.py | 10 ---------- 5 files changed, 7 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3795f99684f47..bbad7fb1d4be2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -109,6 +109,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * ([#10403](https://github.com/PyTorchLightning/pytorch-lightning/pull/10403)) * ([#10448](https://github.com/PyTorchLightning/pytorch-lightning/pull/10448)) +- Removed deprecated signature for `transfer_batch_to_device` hook. The new argument `dataloader_idx` is now required ([#10480](https://github.com/PyTorchLightning/pytorch-lightning/pull/10480)) + - Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451)) @@ -119,6 +121,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) +- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) + ### Fixed - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702)) diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py index 0c47b1ec97557..376a6919ca43f 100644 --- a/pytorch_lightning/core/hooks.py +++ b/pytorch_lightning/core/hooks.py @@ -693,7 +693,7 @@ def transfer_batch_to_device(self, batch, device, dataloader_idx): # skip device transfer for the first dataloader or anything you wish pass else: - batch = super().transfer_batch_to_device(data, device) + batch = super().transfer_batch_to_device(data, device, dataloader_idx) return batch Raises: diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index a2fc9d1a21d4b..b6f064d7d9802 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -262,17 +262,7 @@ def _apply_batch_transfer_handler( ) -> Any: device = device or self.device batch = self.on_before_batch_transfer(batch, dataloader_idx) - - if is_param_in_hook_signature(self.transfer_batch_to_device, "dataloader_idx"): - batch = self.transfer_batch_to_device(batch, device, dataloader_idx) - else: - warning_cache.deprecation( - "`transfer_batch_to_device` hook signature has changed in v1.4." - " `dataloader_idx` parameter has been added to it. Support for" - " the old signature will be removed in v1.6" - ) - batch = self.transfer_batch_to_device(batch, device) - + batch = self.transfer_batch_to_device(batch, device, dataloader_idx) batch = self.on_after_batch_transfer(batch, dataloader_idx) return batch diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py index 38a2caceed859..7313728256b4e 100644 --- a/tests/accelerators/test_dp.py +++ b/tests/accelerators/test_dp.py @@ -143,7 +143,7 @@ def test_dp_raise_exception_with_batch_transfer_hooks(tmpdir, monkeypatch): monkeypatch.setattr("torch.cuda.device_count", lambda: 2) class CustomModel(BoringModel): - def transfer_batch_to_device(self, batch, device): + def transfer_batch_to_device(self, batch, device, dataloader_idx): batch = batch.to(device) return batch diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 0e450caa18b62..d2f3cec5cba4f 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -21,16 +21,6 @@ from tests.helpers import BoringModel -def test_old_transfer_batch_to_device_hook(tmpdir): - class OldModel(BoringModel): - def transfer_batch_to_device(self, batch, device): - return super().transfer_batch_to_device(batch, device, None) - - trainer = Trainer(default_root_dir=tmpdir, limit_train_batches=1, limit_val_batches=0, max_epochs=1) - with pytest.deprecated_call(match="old signature will be removed in v1.6"): - trainer.fit(OldModel()) - - def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir): model = BoringModel() From 7a9a08c5d3ca4699fb439f691c40e1320b37507a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Sat, 13 Nov 2021 21:35:03 +0100 Subject: [PATCH 09/18] Drop torch 1.6 testing (#10390) * Drop torch 1.6 support * Drop 1.6 support * Update CHANGELOG * Fixes * Split change * Undo change * 1.7 -> 1.7.1 https://github.com/pytorch/pytorch/issues/47354 * Force trigger nightly * Update .github/workflows/events-nightly.yml Co-authored-by: Aki Nitta * Revert 1.7.1 change - try wildcard * Update adjust versions and test it * Undo test changes * Revert "Undo test changes" This reverts commit 3a6acadd115e86f02d83a788f1978372ab6764f3. * Update CHANGELOG.md Co-authored-by: Aki Nitta --- pytorch_lightning/callbacks/quantization.py | 2 +- pytorch_lightning/distributed/dist.py | 5 +- .../overrides/torch_distributed.py | 99 ------------------- .../plugins/training_type/ddp.py | 16 ++- .../plugins/training_type/ddp_spawn.py | 17 ++-- .../connectors/accelerator_connector.py | 5 +- pytorch_lightning/utilities/__init__.py | 1 - pytorch_lightning/utilities/auto_restart.py | 35 ++----- pytorch_lightning/utilities/cloud_io.py | 9 +- pytorch_lightning/utilities/imports.py | 3 +- pytorch_lightning/utilities/seed.py | 6 +- tests/callbacks/test_quantization.py | 2 +- tests/conftest.py | 6 +- tests/core/test_metric_result_integration.py | 5 +- tests/helpers/datamodules.py | 1 - .../loops/optimization/test_optimizer_loop.py | 2 - tests/loops/test_loops.py | 5 - tests/plugins/test_double_plugin.py | 6 +- tests/profiler/test_profiler.py | 4 +- .../connectors/test_checkpoint_connector.py | 2 - .../connectors/test_signal_connector.py | 2 +- tests/trainer/test_data_loading.py | 2 +- tests/trainer/test_supporters.py | 2 - tests/utilities/test_auto_restart.py | 6 +- 24 files changed, 40 insertions(+), 203 deletions(-) delete mode 100644 pytorch_lightning/overrides/torch_distributed.py diff --git a/pytorch_lightning/callbacks/quantization.py b/pytorch_lightning/callbacks/quantization.py index ca82a574f71d1..42f0d575ffb6f 100644 --- a/pytorch_lightning/callbacks/quantization.py +++ b/pytorch_lightning/callbacks/quantization.py @@ -28,7 +28,7 @@ if _TORCH_GREATER_EQUAL_1_8: from torch.quantization import FakeQuantizeBase else: - # For torch 1.6 and 1.7. + # For torch 1.7. from torch.quantization import FakeQuantize as FakeQuantizeBase import pytorch_lightning as pl diff --git a/pytorch_lightning/distributed/dist.py b/pytorch_lightning/distributed/dist.py index 082e0c617a5f7..a0054d17936b0 100644 --- a/pytorch_lightning/distributed/dist.py +++ b/pytorch_lightning/distributed/dist.py @@ -13,7 +13,8 @@ # limitations under the License. from typing import Any -from pytorch_lightning.overrides.torch_distributed import broadcast_object_list +import torch.distributed + from pytorch_lightning.utilities import rank_zero_deprecation from pytorch_lightning.utilities.distributed import group as _group @@ -40,6 +41,6 @@ def broadcast(self, obj: Any, group=_group.WORLD): if self.rank != 0: obj = [None] * len(obj) - broadcast_object_list(obj, 0, group=group or _group.WORLD) + torch.distributed.broadcast_object_list(obj, 0, group=group or _group.WORLD) return obj[0] diff --git a/pytorch_lightning/overrides/torch_distributed.py b/pytorch_lightning/overrides/torch_distributed.py deleted file mode 100644 index 3cbbe5ea760ff..0000000000000 --- a/pytorch_lightning/overrides/torch_distributed.py +++ /dev/null @@ -1,99 +0,0 @@ -import logging -import pickle - -import torch - -from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8 - -log = logging.getLogger(__name__) - -if torch.distributed.is_available(): - from torch.distributed import Backend, broadcast, get_backend, get_rank, GroupMember - -# The code underneath is taken from PyTorch `torch/distributed/distributed_c10d.py` -# and enable broadcasting for PyTorch 1.6 and lower. - - -# https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L160 -def _rank_not_in_group(group): - """Helper that checks if the current process's rank is not in a given group.""" - if group is None: - return False - return group == GroupMember.NON_GROUP_MEMBER - - -# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L1164 -def _object_to_tensor(obj): - buffer = pickle.dumps(obj) - byte_storage = torch.ByteStorage.from_buffer(buffer) # type: ignore[attr-defined] - byte_tensor = torch.ByteTensor(byte_storage) - local_size = torch.LongTensor([byte_tensor.numel()]) - return byte_tensor, local_size - - -# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py -def _tensor_to_object(tensor, tensor_size): - buf = tensor.numpy().tobytes()[:tensor_size] - out = pickle.loads(buf) - return out - - -# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L1327 -def _broadcast_object_list(object_list, src=0, group=None): - if _rank_not_in_group(group): - return - - my_rank = get_rank() - # Serialize object_list elements to tensors on src rank. - if my_rank == src: - tensor_list, size_list = zip(*(_object_to_tensor(obj) for obj in object_list)) - object_sizes_tensor = torch.cat(size_list) - else: - object_sizes_tensor = torch.LongTensor(len(object_list)) - - group_backend = get_backend(group) - is_nccl_backend = group_backend == Backend.NCCL - current_device = torch.device("cpu") - if is_nccl_backend: - # See note about using torch.cuda.current_device() here in docstring. - # We cannot simply use my_rank since rank == device is not necessarily - # true. - current_device = torch.device("cuda", torch.cuda.current_device()) - object_sizes_tensor = object_sizes_tensor.to(current_device) - object_sizes_tensor = object_sizes_tensor.to(current_device) - - # Broadcast object sizes - broadcast(object_sizes_tensor, src=src, group=group) - - # Concatenate and broadcast serialized object tensors - if my_rank == src: - object_tensor = torch.cat(tensor_list) - else: - object_tensor = torch.ByteTensor(torch.sum(object_sizes_tensor).item()) - - if is_nccl_backend: - object_tensor = object_tensor.to(current_device) - - broadcast(object_tensor, src=src, group=group) - - # Deserialize objects using their stored sizes. - offset = 0 - if my_rank != src: - for i, obj_size in enumerate(object_sizes_tensor): - obj_view = object_tensor[offset : offset + obj_size] - obj_view = obj_view.type(torch.ByteTensor) # type: ignore[call-overload] - offset += obj_size - object_list[i] = _tensor_to_object(obj_view, obj_size) - - -if not torch.distributed.is_available(): - # avoid failures on early PyTorch versions for Windows where - # not all functions used in `broadcast_object_list` are available. - def _broadcast_noop(obj, *_, **__): - return obj - - broadcast_object_list = _broadcast_noop -elif _TORCH_GREATER_EQUAL_1_8: - from torch.distributed.distributed_c10d import broadcast_object_list -else: - broadcast_object_list = _broadcast_object_list diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index c528be4c8bfef..84e9b55b9ee08 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -34,7 +34,6 @@ from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.overrides import LightningDistributedModule from pytorch_lightning.overrides.distributed import prepare_for_backward -from pytorch_lightning.overrides.torch_distributed import broadcast_object_list from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin @@ -43,7 +42,6 @@ _FAIRSCALE_AVAILABLE, _HYDRA_AVAILABLE, _IS_WINDOWS, - _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8, _TORCH_GREATER_EQUAL_1_9, _TORCH_GREATER_EQUAL_1_10, @@ -255,15 +253,13 @@ def pre_configure_ddp(self): # when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True. # This flag does come with a performance hit, so it is suggested to disable in cases where it is possible. self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get("find_unused_parameters", True) - # todo: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization - if ( - _TORCH_GREATER_EQUAL_1_7 - and not self.lightning_module.automatic_optimization - and not self._ddp_kwargs.get("find_unused_parameters", False) + if not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get( + "find_unused_parameters", False ): + # TODO: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization rank_zero_warn( - "From PyTorch 1.7.0, Lightning ``manual_optimization`` needs to set ``find_unused_parameters=True`` " - "to properly work with DDP." + "From PyTorch 1.7.0, Lightning `manual_optimization` needs to set `find_unused_parameters=True` to" + " properly work with DDP. Using `find_unused_parameters=True`." ) self._ddp_kwargs["find_unused_parameters"] = True @@ -371,7 +367,7 @@ def broadcast(self, obj: object, src: int = 0) -> object: obj = [obj] if self.global_rank != src: obj = [None] - broadcast_object_list(obj, src, group=_group.WORLD) + torch.distributed.broadcast_object_list(obj, src, group=_group.WORLD) return obj[0] def pre_backward(self, closure_loss: torch.Tensor) -> None: diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 926409925b9c7..677e031cd04af 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -27,12 +27,11 @@ import pytorch_lightning as pl from pytorch_lightning.overrides import LightningDistributedModule from pytorch_lightning.overrides.distributed import prepare_for_backward -from pytorch_lightning.overrides.torch_distributed import broadcast_object_list from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin from pytorch_lightning.trainer.states import TrainerFn -from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8, rank_zero_warn +from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_8, rank_zero_warn from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device from pytorch_lightning.utilities.cloud_io import atomic_save from pytorch_lightning.utilities.cloud_io import load as pl_load @@ -238,15 +237,13 @@ def pre_configure_ddp(self): # when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True. # This flag does come with a performance hit, so it is suggested to disable in cases where it is possible. self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get("find_unused_parameters", True) - # todo: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization - if ( - _TORCH_GREATER_EQUAL_1_7 - and not self.lightning_module.automatic_optimization - and not self._ddp_kwargs.get("find_unused_parameters", False) + if not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get( + "find_unused_parameters", False ): + # TODO: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization rank_zero_warn( - "From PyTorch 1.7.0, Lightning ``manual_optimization`` needs to set ``find_unused_parameters=True`` " - "to properly work with DDP." + "From PyTorch 1.7.0, Lightning `manual_optimization` needs to set `find_unused_parameters=True` to" + " properly work with DDP. Using `find_unused_parameters=True`." ) self._ddp_kwargs["find_unused_parameters"] = True @@ -323,7 +320,7 @@ def broadcast(self, obj: object, src: int = 0) -> object: obj = [obj] if self.global_rank != src: obj = [None] - broadcast_object_list(obj, src, group=_group.WORLD) + torch.distributed.broadcast_object_list(obj, src, group=_group.WORLD) return obj[0] def model_to_device(self): diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index e15f7bb853db8..43eb65ce21a22 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -74,7 +74,6 @@ from pytorch_lightning.utilities.imports import ( _HOROVOD_AVAILABLE, _IPU_AVAILABLE, - _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8, _TPU_AVAILABLE, ) @@ -190,10 +189,8 @@ def _init_deterministic(self, deterministic: bool) -> None: self.deterministic = deterministic if _TORCH_GREATER_EQUAL_1_8: torch.use_deterministic_algorithms(deterministic) - elif _TORCH_GREATER_EQUAL_1_7: + else: torch.set_deterministic(deterministic) - else: # the minimum version Lightning supports is PyTorch 1.6 - torch._set_deterministic(deterministic) if deterministic: # fixing non-deterministic part of horovod # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383 diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index 158d7356c91ce..7343e28d6d811 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -44,7 +44,6 @@ _OMEGACONF_AVAILABLE, _POPTORCH_AVAILABLE, _RICH_AVAILABLE, - _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8, _TORCH_GREATER_EQUAL_1_9, _TORCH_GREATER_EQUAL_1_10, diff --git a/pytorch_lightning/utilities/auto_restart.py b/pytorch_lightning/utilities/auto_restart.py index f0b50103cf2f2..ef52717636d90 100644 --- a/pytorch_lightning/utilities/auto_restart.py +++ b/pytorch_lightning/utilities/auto_restart.py @@ -305,9 +305,6 @@ def _wrap_generator_samplers(self) -> None: # access wrapped dataset attributes dataset_dict = self.dataset.__dict__ - # create a tuple of sampler names - samplers_names = tuple(v.__class__.__name__ for k, v in dataset_dict.items() if isinstance(v, Sampler)) - # create a dictionary of generator present within the dataset attributes dataset_sampler_generators = {k: v for k, v in dataset_dict.items() if isinstance(v, (Generator, Iterator))} @@ -318,31 +315,17 @@ def _wrap_generator_samplers(self) -> None: if isinstance(generator, Sampler): continue - # used to handle a weird behaviour from PyTorch 1.6 - # where the sampler is converted to a list_iterator - is_legacy = False - - if isinstance(generator, Generator): - # Generator name have the the form `SamplerName.__iter__` - generator_name = generator.__qualname__.split(".")[0] - else: - # assume the retrieved iterator is coming from sampler. - is_legacy = True - - # validate the base generator name matches a sampler name. - if is_legacy or any(sampler_name == generator_name for sampler_name in samplers_names): - - # wrap the generator into a `FastForwardSampler` - sampler = FastForwardSampler(generator, attr_name=generator_attr_name) + # wrap the generator into a `FastForwardSampler` + sampler = FastForwardSampler(generator, attr_name=generator_attr_name) - # if `CaptureIterableDataset` was available, the sampler should reload its own state. - if self._state_dict is not None: - sampler.load_state_dict(self._state_dict[generator_attr_name]) - # store the samplers - self.samplers[generator_attr_name] = sampler + # if `CaptureIterableDataset` was available, the sampler should reload its own state. + if self._state_dict is not None: + sampler.load_state_dict(self._state_dict[generator_attr_name]) + # store the samplers + self.samplers[generator_attr_name] = sampler - # replace generator with the generator from the `FastForwardSampler`. - dataset_dict[generator_attr_name] = iter(sampler) + # replace generator with the generator from the `FastForwardSampler`. + dataset_dict[generator_attr_name] = iter(sampler) self.reset_on_epoch() diff --git a/pytorch_lightning/utilities/cloud_io.py b/pytorch_lightning/utilities/cloud_io.py index 9b40f6d69cfad..2c9eb1f768d3c 100644 --- a/pytorch_lightning/utilities/cloud_io.py +++ b/pytorch_lightning/utilities/cloud_io.py @@ -19,7 +19,6 @@ import fsspec import torch from fsspec.implementations.local import AbstractFileSystem, LocalFileSystem -from packaging.version import Version def load( @@ -59,12 +58,6 @@ def atomic_save(checkpoint: Dict[str, Any], filepath: Union[str, Path]) -> None: """ bytesbuffer = io.BytesIO() - # Can't use the new zipfile serialization for 1.6.0 because there's a bug in - # torch.hub.load_state_dict_from_url() that prevents it from loading the new files. - # More details can be found here: https://github.com/pytorch/pytorch/issues/42239 - if Version(torch.__version__).release[:3] == (1, 6, 0): - torch.save(checkpoint, bytesbuffer, _use_new_zipfile_serialization=False) - else: - torch.save(checkpoint, bytesbuffer) + torch.save(checkpoint, bytesbuffer) with fsspec.open(filepath, "wb") as f: f.write(bytesbuffer.getvalue()) diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py index edf5f75aee6a9..5db24fe0f5cff 100644 --- a/pytorch_lightning/utilities/imports.py +++ b/pytorch_lightning/utilities/imports.py @@ -70,7 +70,6 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version: _IS_WINDOWS = platform.system() == "Windows" _IS_INTERACTIVE = hasattr(sys, "ps1") # https://stackoverflow.com/a/64523765 -_TORCH_GREATER_EQUAL_1_7 = _compare_version("torch", operator.ge, "1.7.0") _TORCH_GREATER_EQUAL_1_8 = _compare_version("torch", operator.ge, "1.8.0") _TORCH_GREATER_EQUAL_1_8_1 = _compare_version("torch", operator.ge, "1.8.1") _TORCH_GREATER_EQUAL_1_9 = _compare_version("torch", operator.ge, "1.9.0") @@ -112,4 +111,4 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version: # experimental feature within PyTorch Lightning. def _fault_tolerant_training() -> bool: - return _TORCH_GREATER_EQUAL_1_7 and int(os.getenv("PL_FAULT_TOLERANT_TRAINING", 0)) + return bool(int(os.getenv("PL_FAULT_TOLERANT_TRAINING", 0))) diff --git a/pytorch_lightning/utilities/seed.py b/pytorch_lightning/utilities/seed.py index 3b20c53353411..e8fc243f484f8 100644 --- a/pytorch_lightning/utilities/seed.py +++ b/pytorch_lightning/utilities/seed.py @@ -21,7 +21,7 @@ import numpy as np import torch -from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, rank_zero_warn +from pytorch_lightning.utilities import rank_zero_warn from pytorch_lightning.utilities.distributed import rank_zero_only log = logging.getLogger(__name__) @@ -113,9 +113,7 @@ def pl_worker_init_function(worker_id: int, rank: Optional[int] = None) -> None: np.random.seed(ss.generate_state(4)) # Spawn distinct SeedSequences for the PyTorch PRNG and the stdlib random module torch_ss, stdlib_ss = ss.spawn(2) - # PyTorch 1.7 and above takes a 64-bit seed - dtype = np.uint64 if _TORCH_GREATER_EQUAL_1_7 else np.uint32 - torch.manual_seed(torch_ss.generate_state(1, dtype=dtype)[0]) + torch.manual_seed(torch_ss.generate_state(1, dtype=np.uint64)[0]) # use 128 bits expressed as an integer stdlib_seed = (stdlib_ss.generate_state(2, dtype=np.uint64).astype(object) * [1 << 64, 1]).sum() random.seed(stdlib_seed) diff --git a/tests/callbacks/test_quantization.py b/tests/callbacks/test_quantization.py index fa2ee767bdc8c..e3dfb9b6a7edf 100644 --- a/tests/callbacks/test_quantization.py +++ b/tests/callbacks/test_quantization.py @@ -31,7 +31,7 @@ if _TORCH_GREATER_EQUAL_1_8: from torch.quantization import FakeQuantizeBase else: - # For torch 1.6 and 1.7. + # For torch 1.7. from torch.quantization import FakeQuantize as FakeQuantizeBase diff --git a/tests/conftest.py b/tests/conftest.py index 860f9357e4636..3d5548b7bd0ae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,7 @@ import torch.distributed from pytorch_lightning.plugins.environments.lightning_environment import find_free_network_port -from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8 +from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8 from tests import _PATH_DATASETS @@ -95,10 +95,8 @@ def reset_deterministic_algorithm(): yield if _TORCH_GREATER_EQUAL_1_8: torch.use_deterministic_algorithms(False) - elif _TORCH_GREATER_EQUAL_1_7: + else: torch.set_deterministic(False) - else: # the minimum version Lightning supports is PyTorch 1.6 - torch._set_deterministic(False) @pytest.fixture diff --git a/tests/core/test_metric_result_integration.py b/tests/core/test_metric_result_integration.py index 12fe7f2fb4652..9ec2f150ac5d4 100644 --- a/tests/core/test_metric_result_integration.py +++ b/tests/core/test_metric_result_integration.py @@ -33,7 +33,7 @@ ResultCollection, ResultMetric, ) -from pytorch_lightning.utilities.imports import _fault_tolerant_training, _TORCH_GREATER_EQUAL_1_7 +from pytorch_lightning.utilities.imports import _fault_tolerant_training from tests.helpers import BoringModel from tests.helpers.runif import RunIf @@ -470,21 +470,18 @@ def on_epoch_end(self) -> None: @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7") def test_result_collection_reload(tmpdir): result_collection_reload(default_root_dir=tmpdir) @RunIf(min_gpus=1) @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7") def test_result_collection_reload_1_gpu_ddp(tmpdir): result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=1) @RunIf(min_gpus=2, special=True) @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7") def test_result_collection_reload_2_gpus(tmpdir): result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=2) diff --git a/tests/helpers/datamodules.py b/tests/helpers/datamodules.py index 0cb178a749a09..78e806b37937e 100644 --- a/tests/helpers/datamodules.py +++ b/tests/helpers/datamodules.py @@ -46,7 +46,6 @@ def prepare_data(self): self.dataset_cls(self.data_dir, train=False, download=True) def setup(self, stage: Optional[str] = None): - # TODO: need to split using random_split once updated to torch >= 1.6 if stage == "fit" or stage is None: self.mnist_train = self.dataset_cls(self.data_dir, train=True) if stage == "test" or stage is None: diff --git a/tests/loops/optimization/test_optimizer_loop.py b/tests/loops/optimization/test_optimizer_loop.py index 7e17cbbd56645..ae77c4387a398 100644 --- a/tests/loops/optimization/test_optimizer_loop.py +++ b/tests/loops/optimization/test_optimizer_loop.py @@ -24,7 +24,6 @@ from pytorch_lightning.loops.optimization.optimizer_loop import ClosureResult from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers import BoringModel -from tests.helpers.runif import RunIf def test_closure_result_deepcopy(): @@ -140,7 +139,6 @@ class CustomException(Exception): pass -@RunIf(min_torch="1.7.0") @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) @pytest.mark.parametrize("stop_epoch", (0, 1)) @pytest.mark.parametrize("stop_batch", (0, 1, 2)) diff --git a/tests/loops/test_loops.py b/tests/loops/test_loops.py index bad9a717d1629..8166f14754a51 100644 --- a/tests/loops/test_loops.py +++ b/tests/loops/test_loops.py @@ -253,7 +253,6 @@ def on_load_checkpoint(self, state_dict: Dict) -> None: assert state_dict == {"state_dict": {"a": 1}, "progress": {"increment": 1}} -@RunIf(min_torch="1.7.0") @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) @pytest.mark.parametrize("stop_epoch", (1, 2)) @pytest.mark.parametrize("stop_batch", (1, 2)) @@ -323,7 +322,6 @@ def val_dataloader(self): assert trainer.fit_loop.epoch_loop.val_loop.epoch_loop.batch_progress.state_dict() == expected -@RunIf(min_torch="1.7.0") @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) @pytest.mark.parametrize("accumulate_grad_batches", (1, 2, 3)) @pytest.mark.parametrize("n_optimizers", (1, 3, 5)) @@ -526,7 +524,6 @@ def configure_optimizers_multiple(self): assert state_dict["epoch_progress"]["current"]["started"] == stop_epoch -@RunIf(min_torch="1.7.0") @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) @pytest.mark.parametrize("n_optimizers", (1, 3, 5)) def test_loop_state_on_complete_run(n_optimizers, tmpdir): @@ -662,7 +659,6 @@ def train_dataloader(self): assert checkpoint["loops"]["fit_loop"] == expected -@RunIf(min_torch="1.7.0") @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) def test_fit_loop_reset(tmpdir): """Test that the reset logic in fit- and epoch loop is aware of whether the loop is restarting from a completed @@ -752,7 +748,6 @@ def test_fit_loop_reset(tmpdir): @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@RunIf(min_torch="1.7.0") @pytest.mark.parametrize( ["train_datasets", "val_datasets"], [([RandomDataset], [RandomDataset]), ([RandomDataset], [RandomDataset, RandomDataset])], diff --git a/tests/plugins/test_double_plugin.py b/tests/plugins/test_double_plugin.py index cadd02c692af5..b3fdf87428522 100644 --- a/tests/plugins/test_double_plugin.py +++ b/tests/plugins/test_double_plugin.py @@ -20,7 +20,6 @@ from pytorch_lightning import Trainer from pytorch_lightning.plugins import DoublePrecisionPlugin -from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7 from tests.helpers.boring_model import BoringModel, RandomDataset from tests.helpers.runif import RunIf @@ -137,10 +136,7 @@ def on_fit_start(self): [ DoublePrecisionBoringModel, DoublePrecisionBoringModelNoForward, - pytest.param( - DoublePrecisionBoringModelComplexBuffer, - marks=pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="torch.complex not available"), - ), + DoublePrecisionBoringModelComplexBuffer, ], ) def test_double_precision(tmpdir, boring_model): diff --git a/tests/profiler/test_profiler.py b/tests/profiler/test_profiler.py index c4bcb56ca87bb..37756fcc62351 100644 --- a/tests/profiler/test_profiler.py +++ b/tests/profiler/test_profiler.py @@ -26,7 +26,6 @@ from pytorch_lightning.loggers.tensorboard import TensorBoardLogger from pytorch_lightning.profiler import AdvancedProfiler, PassThroughProfiler, PyTorchProfiler, SimpleProfiler from pytorch_lightning.profiler.pytorch import RegisterRecordFunction -from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7 from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE from tests.helpers import BoringModel, ManualOptimBoringModel @@ -394,8 +393,7 @@ def test_pytorch_profiler_nested(tmpdir): names = {"a", "b", "c"} ops = {"add", "empty", "fill_", "ones", "zero_", "zeros"} - if _TORCH_GREATER_EQUAL_1_7: - ops = {"aten::" + op for op in ops} + ops = {"aten::" + op for op in ops} expected = names.union(ops) assert events_name == expected, (events_name, torch.__version__, platform.system()) diff --git a/tests/trainer/connectors/test_checkpoint_connector.py b/tests/trainer/connectors/test_checkpoint_connector.py index 6b408845ed879..4a42265eb21b0 100644 --- a/tests/trainer/connectors/test_checkpoint_connector.py +++ b/tests/trainer/connectors/test_checkpoint_connector.py @@ -21,7 +21,6 @@ from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.trainer.states import TrainerFn from tests.helpers import BoringModel -from tests.helpers.runif import RunIf class HPCHookdedModel(BoringModel): @@ -133,7 +132,6 @@ def test_hpc_max_ckpt_version(tmpdir): @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@RunIf(min_torch="1.7.0") def test_loops_restore(tmpdir): """Test that required loop state_dict is loaded correctly by checkpoint connector.""" model = BoringModel() diff --git a/tests/trainer/connectors/test_signal_connector.py b/tests/trainer/connectors/test_signal_connector.py index 3da8c100fe40c..aa5407e2f1228 100644 --- a/tests/trainer/connectors/test_signal_connector.py +++ b/tests/trainer/connectors/test_signal_connector.py @@ -26,7 +26,7 @@ @pytest.mark.parametrize("register_handler", [False, True]) @pytest.mark.parametrize("terminate_gracefully", [False, True]) -@RunIf(min_torch="1.7.0", skip_windows=True) +@RunIf(skip_windows=True) def test_fault_tolerant_sig_handler(register_handler, terminate_gracefully, tmpdir): # hack to reset the signal diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py index 0f6abd38e6836..97097b2074ca1 100644 --- a/tests/trainer/test_data_loading.py +++ b/tests/trainer/test_data_loading.py @@ -26,7 +26,7 @@ from tests.helpers.runif import RunIf -@RunIf(skip_windows=True, min_torch="1.7.0") +@RunIf(skip_windows=True) @pytest.mark.parametrize("mode", (1, 2, 3)) def test_replace_distributed_sampler(tmpdir, mode): class IndexedRandomDataset(RandomDataset): diff --git a/tests/trainer/test_supporters.py b/tests/trainer/test_supporters.py index e4598550c24fb..694d473155439 100644 --- a/tests/trainer/test_supporters.py +++ b/tests/trainer/test_supporters.py @@ -35,7 +35,6 @@ from pytorch_lightning.utilities.auto_restart import CaptureMapDataset, FastForwardSampler from pytorch_lightning.utilities.data import get_len from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_7 from tests.helpers.boring_model import RandomDataset @@ -312,7 +311,6 @@ def test_nested_calc_num_data(input_data, compute_func, expected_length): assert calculated_length == expected_length -@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7") @mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1", "PL_TRAINER_GPUS": "2"}) @mock.patch("torch.cuda.device_count", return_value=2) @mock.patch("torch.cuda.is_available", return_value=True) diff --git a/tests/utilities/test_auto_restart.py b/tests/utilities/test_auto_restart.py index 4e3385cebecbc..b36a9d1d76941 100644 --- a/tests/utilities/test_auto_restart.py +++ b/tests/utilities/test_auto_restart.py @@ -690,7 +690,6 @@ def create_dataloader(): } -@RunIf(min_torch="1.7.0") @pytest.mark.parametrize("use_fault_tolerant", ["0", "1"]) def test_data_loading_wraps_dataset_and_samplers(use_fault_tolerant, tmpdir): """This test ensures the dataset and sampler are properly wrapped when fault tolerant is enabled.""" @@ -785,7 +784,6 @@ def __len__(self): # TODO: test with `RandomGeneratorGetItemDataset` @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@RunIf(min_torch="1.7.0") @pytest.mark.parametrize( "dataset_class", [ @@ -921,7 +919,6 @@ def _run_training(trainer_kwargs, dataset_classes, fail_on_step: int = -1, ckpt_ @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@RunIf(min_torch="1.7.0") @pytest.mark.parametrize( "dataset_classes", [ @@ -975,7 +972,6 @@ def test_dataset_rng_states_restart_with_lightning(tmpdir, dataset_classes, mult @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@RunIf(min_torch="1.7.0") @pytest.mark.parametrize( ["train_datasets", "val_datasets"], [ @@ -1139,7 +1135,7 @@ def _fit_model( @pytest.mark.parametrize("failure_on_training", [False, True]) @pytest.mark.parametrize("failure_on_step", [False, True]) @mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"}) -@RunIf(min_torch="1.7.0", skip_windows=True) +@RunIf(skip_windows=True) def test_auto_restart_under_signal(on_last_batch, val_check_interval, failure_on_training, failure_on_step, tmpdir): """This test asserts that if a signal is being sent during the training / validation phase, the model should restart in a reproducible way.""" From ffb40060c099f08f1f5b3e94ea58284e6503e556 Mon Sep 17 00:00:00 2001 From: thomas chaton Date: Mon, 15 Nov 2021 10:03:46 +0000 Subject: [PATCH 10/18] shutdown workers on failure (#10463) --- CHANGELOG.md | 3 +++ pytorch_lightning/trainer/trainer.py | 2 ++ tests/loops/test_loops.py | 34 ++++++++++++++++++++++------ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbad7fb1d4be2..0ff6ebbbc6512 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -131,6 +131,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374)) +- Fixed an issue that prevented the Trainer to shutdown workers when execution is interrupted due to failure([#10463](https://github.com/PyTorchLightning/pytorch-lightning/issues/10463)) + + - Squeeze the early stopping monitor to remove empty tensor dimensions ([#10461](https://github.com/PyTorchLightning/pytorch-lightning/issues/10461)) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 396289000251d..b84f03393309b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -694,6 +694,8 @@ def _call_and_handle_interrupt(self, trainer_fn: Callable, *args: Any, **kwargs: # reset bookkeeping self.state.stage = None self.on_exception(exception) + # shutdown workers + self._data_connector.teardown() raise def fit( diff --git a/tests/loops/test_loops.py b/tests/loops/test_loops.py index 8166f14754a51..63a2211934ece 100644 --- a/tests/loops/test_loops.py +++ b/tests/loops/test_loops.py @@ -24,7 +24,7 @@ from pl_examples.bug_report_model import RandomDataset from pytorch_lightning import LightningModule, Trainer -from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning.callbacks import Callback, ModelCheckpoint from pytorch_lightning.loops import Loop, TrainingBatchLoop from pytorch_lightning.trainer.progress import BaseProgress from tests.helpers import BoringModel @@ -907,8 +907,10 @@ def val_dataloader(self): @RunIf(min_torch="1.8.0") -@pytest.mark.parametrize("persistent_workers", (False, True)) -def test_workers_are_shutdown(tmpdir, persistent_workers): +@pytest.mark.parametrize("should_fail", [False, True]) +# False is de-activated due to slowness +@pytest.mark.parametrize("persistent_workers", [True]) +def test_workers_are_shutdown(tmpdir, should_fail, persistent_workers): # `num_workers == 1` uses `_MultiProcessingDataLoaderIter` # `persistent_workers` makes sure `self._iterator` gets set on the `DataLoader` instance @@ -936,12 +938,30 @@ def _get_iterator(self): train_dataloader = TestDataLoader(RandomDataset(32, 64), num_workers=1, persistent_workers=persistent_workers) val_dataloader = TestDataLoader(RandomDataset(32, 64), num_workers=1, persistent_workers=persistent_workers) + class TestCallback(Callback): + def on_train_epoch_end(self, trainer, *_): + if trainer.current_epoch == 1: + raise CustomException + max_epochs = 3 + model = BoringModel() - trainer = Trainer(default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=2, max_epochs=max_epochs) - trainer.fit(model, train_dataloader, val_dataloader) - assert train_dataloader.count_shutdown_workers == (2 if persistent_workers else max_epochs) + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=2, + limit_val_batches=2, + max_epochs=max_epochs, + callbacks=TestCallback() if should_fail else None, + ) + + if should_fail: + with pytest.raises(CustomException): + trainer.fit(model, train_dataloader, val_dataloader) + else: + trainer.fit(model, train_dataloader, val_dataloader) + + assert train_dataloader.count_shutdown_workers == 2 if should_fail else (2 if persistent_workers else max_epochs) # on sanity checking end, the workers are being deleted too. - assert val_dataloader.count_shutdown_workers == (2 if persistent_workers else max_epochs + 1) + assert val_dataloader.count_shutdown_workers == 2 if persistent_workers else (3 if should_fail else max_epochs + 1) assert train_dataloader._iterator is None assert val_dataloader._iterator is None From 8b0cb47cc03bc29a69e84a5637f8caf23ef364a7 Mon Sep 17 00:00:00 2001 From: puhuk Date: Mon, 15 Nov 2021 20:54:47 +0900 Subject: [PATCH 11/18] Remove deprecated `hpc_load` in `CheckpointConnector` (#10525) Co-authored-by: Aki Nitta --- CHANGELOG.md | 3 +++ .../trainer/connectors/checkpoint_connector.py | 11 ----------- tests/deprecated_api/test_remove_1-6.py | 10 ---------- 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ff6ebbbc6512..4aef0b569c7b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -123,6 +123,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) + +- Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525)) + ### Fixed - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702)) diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py index 921c2e0a7e160..ab0d3aa4288fa 100644 --- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py +++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py @@ -413,17 +413,6 @@ def dump_checkpoint(self, weights_only: bool = False) -> dict: return checkpoint - def hpc_load(self, checkpoint_path: _PATH) -> None: - """Attempts to restore the full training and model state from a HPC checkpoint file. - - .. deprecated:: v1.4 Will be removed in v1.6. Use :meth:`restore` instead. - """ - rank_zero_deprecation( - "`CheckpointConnector.hpc_load()` was deprecated in v1.4 and will be removed in v1.6." - " Use `CheckpointConnector.restore()` instead." - ) - self.restore(checkpoint_path) - def max_ckpt_version_in_folder(self, dir_path: _PATH, name_key: str = "ckpt_") -> Optional[int]: """List up files in `dir_path` with `name_key`, then yield maximum suffix number. diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index d2f3cec5cba4f..4270b5d278a2d 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -62,13 +62,3 @@ def test_v1_6_0_deprecated_disable_validation(): trainer = Trainer() with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"): _ = trainer.disable_validation - - -def test_v1_6_0_deprecated_hpc_load(tmpdir): - model = BoringModel() - trainer = Trainer(default_root_dir=tmpdir, max_steps=1) - trainer.fit(model) - trainer.checkpoint_connector.hpc_save(tmpdir, trainer.logger) - checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(str(tmpdir)) - with pytest.deprecated_call(match=r"`CheckpointConnector.hpc_load\(\)` was deprecated in v1.4"): - trainer.checkpoint_connector.hpc_load(checkpoint_path) From 794c4b08c0dacd89e38d5cb393db3fbd14c358b1 Mon Sep 17 00:00:00 2001 From: Shivam Mehta Date: Mon, 15 Nov 2021 13:56:30 +0100 Subject: [PATCH 12/18] Remove deprecated `is_overridden(model=...)` (#10507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos Mocholí --- CHANGELOG.md | 3 +++ pytorch_lightning/utilities/model_helpers.py | 17 ++--------------- tests/deprecated_api/test_remove_1-6.py | 9 --------- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4aef0b569c7b6..02283180b19f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed +- Removed deprecated parameter `method` in `pytorch_lightning.utilities.model_helpers.is_overridden` ([#10507](https://github.com/PyTorchLightning/pytorch-lightning/pull/10507)) + + - Remove deprecated method `ClusterEnvironment.creates_children` ([#10339](https://github.com/PyTorchLightning/pytorch-lightning/issues/10339)) diff --git a/pytorch_lightning/utilities/model_helpers.py b/pytorch_lightning/utilities/model_helpers.py index 3146b33fe153d..bb48b481e625f 100644 --- a/pytorch_lightning/utilities/model_helpers.py +++ b/pytorch_lightning/utilities/model_helpers.py @@ -12,26 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. from functools import partial -from typing import Optional, Type, Union +from typing import Optional, Type from unittest.mock import Mock import pytorch_lightning as pl -from pytorch_lightning.utilities import rank_zero_deprecation -def is_overridden( - method_name: str, - instance: Optional[object] = None, - parent: Optional[Type[object]] = None, - model: Optional[Union["pl.LightningModule", "pl.LightningDataModule"]] = None, -) -> bool: - if model is not None and instance is None: - rank_zero_deprecation( - "`is_overriden(model=...)` has been deprecated and will be removed in v1.6." - "Please use `is_overriden(instance=...)`" - ) - instance = model - +def is_overridden(method_name: str, instance: Optional[object] = None, parent: Optional[Type[object]] = None) -> bool: if instance is None: # if `self.lightning_module` was passed as instance, it can be `None` return False diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 4270b5d278a2d..efb288a623d6a 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -17,7 +17,6 @@ import pytest from pytorch_lightning import Trainer -from pytorch_lightning.utilities.model_helpers import is_overridden from tests.helpers import BoringModel @@ -50,14 +49,6 @@ def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir): assert tracker.mock_calls == expected_sequence -def test_v1_6_0_is_overridden_model(): - model = BoringModel() - with pytest.deprecated_call(match="and will be removed in v1.6"): - assert is_overridden("validation_step", model=model) - with pytest.deprecated_call(match="and will be removed in v1.6"): - assert not is_overridden("foo", model=model) - - def test_v1_6_0_deprecated_disable_validation(): trainer = Trainer() with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"): From 0c2cdbf88f0f72e1cbad0e73c00f2618d33e978d Mon Sep 17 00:00:00 2001 From: Danielle Pintz <38207072+daniellepintz@users.noreply.github.com> Date: Mon, 15 Nov 2021 08:44:20 -0500 Subject: [PATCH 13/18] Update issues templates (#10537) --- .github/ISSUE_TEMPLATE/bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/documentation.md | 4 ++-- .github/ISSUE_TEMPLATE/feature_request.md | 6 +++--- .../{code_improvement.md => refactor.md} | 12 ++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) rename .github/ISSUE_TEMPLATE/{code_improvement.md => refactor.md} (83%) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 3a94ef6758910..729d258cfcd63 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,14 +1,14 @@ --- name: Bug report -about: Create a report to help us improve +about: Create a bug report to help us improve title: '' -labels: bug / fix, help wanted +labels: bug assignees: '' --- ## 🐛 Bug - + ### To Reproduce diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md index 75919587387a9..f5ff43d6f093a 100644 --- a/.github/ISSUE_TEMPLATE/documentation.md +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -1,8 +1,8 @@ --- name: Typos and doc fixes -about: Typos and doc fixes +about: Tell us about how we can improve our documentation title: '' -labels: documentation +labels: docs assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index ab95a714e6dd7..11da695decfe0 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,8 +1,8 @@ --- name: Feature request -about: Suggest an idea for this project +about: Propose a feature for this project title: '' -labels: enhancement +labels: feature assignees: '' --- @@ -12,7 +12,7 @@ assignees: '' ### Motivation - + ### Pitch diff --git a/.github/ISSUE_TEMPLATE/code_improvement.md b/.github/ISSUE_TEMPLATE/refactor.md similarity index 83% rename from .github/ISSUE_TEMPLATE/code_improvement.md rename to .github/ISSUE_TEMPLATE/refactor.md index 7608b604e611b..5e07b0aae2df1 100644 --- a/.github/ISSUE_TEMPLATE/code_improvement.md +++ b/.github/ISSUE_TEMPLATE/refactor.md @@ -1,18 +1,18 @@ --- -name: Code improvement -about: Suggest a code improvement, i.e. refactoring, deprecation, etc. +name: Refactor +about: Suggest a code refactor or deprecation title: '' -labels: refactors / code health +labels: refactor assignees: '' --- -## Proposed refactoring or deprecation +## Proposed refactor - + ### Motivation - + ### Pitch From ade44653737f8df203118e3f64759cbb54666e24 Mon Sep 17 00:00:00 2001 From: Aki Nitta Date: Mon, 15 Nov 2021 23:15:17 +0900 Subject: [PATCH 14/18] Update configs with new GitHub labels (#10532) Co-authored-by: thomas chaton --- .github/mergify.yml | 6 +++--- .github/stale.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/mergify.yml b/.github/mergify.yml index a2b1e8aede6de..53ec106873dfe 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -45,7 +45,7 @@ pull_request_rules: - "#changes-requested-reviews-by=0" # no requested changes actions: label: - add: [ "0:] Ready-To-Go" ] + add: [ "ready" ] - name: Not ready yet conditions: @@ -54,13 +54,13 @@ pull_request_rules: - "#changes-requested-reviews-by>=1" # no requested changes actions: label: - remove: [ "0:] Ready-To-Go" ] + remove: [ "ready" ] - name: add core reviewer conditions: - -conflict # skip if conflict - -draft # filter-out GH draft PRs - - label="0:] Ready-To-Go" + - label="ready" - "#approved-reviews-by<3" # number of review approvals - "#review-requested<3" # number of requested reviews actions: diff --git a/.github/stale.yml b/.github/stale.yml index 84049394d3aab..1ac5e7448c9ff 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -8,8 +8,8 @@ issues: daysUntilClose: 7 # Issues with these labels will never be considered stale exemptLabels: - - Important - - Priority + - p0 + - p1 # Comment to post when marking an issue as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it hasn't had any recent activity. From 3077886a4ff31a023a6f2296dae1021090942a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 15 Nov 2021 15:27:42 +0100 Subject: [PATCH 15/18] Enable the auto-cc bot (#10531) --- .github/lightning-probot.yml | 1 + .github/workflows/probot-auto-cc.yml | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 .github/lightning-probot.yml create mode 100644 .github/workflows/probot-auto-cc.yml diff --git a/.github/lightning-probot.yml b/.github/lightning-probot.yml new file mode 100644 index 0000000000000..bd6a330a448a4 --- /dev/null +++ b/.github/lightning-probot.yml @@ -0,0 +1 @@ +tracking_issue: 10530 diff --git a/.github/workflows/probot-auto-cc.yml b/.github/workflows/probot-auto-cc.yml new file mode 100644 index 0000000000000..0595c4eee65f7 --- /dev/null +++ b/.github/workflows/probot-auto-cc.yml @@ -0,0 +1,18 @@ +name: Probot + +on: + issues: + types: + - labeled + pull_request: + types: + - labeled + +jobs: + auto-cc: + if: ${{ github.repository_owner == 'PyTorchLightning' }} + runs-on: ubuntu-latest + steps: + - uses: carmocca/probot@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 40945a51765e72400dd3be8f1a84a52a69effa73 Mon Sep 17 00:00:00 2001 From: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Date: Mon, 15 Nov 2021 20:36:56 +0530 Subject: [PATCH 16/18] Remove deprecated `stochastic_weight_avg` example from the docs (#10502) --- docs/source/advanced/training_tricks.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/source/advanced/training_tricks.rst b/docs/source/advanced/training_tricks.rst index 28f81d98dcbd3..a389b0db69a2e 100644 --- a/docs/source/advanced/training_tricks.rst +++ b/docs/source/advanced/training_tricks.rst @@ -64,10 +64,7 @@ read `this post Date: Mon, 15 Nov 2021 22:40:08 +0530 Subject: [PATCH 17/18] Deprecate `DistributedType` in favor of `StrategyType` (#10505) --- CHANGELOG.md | 2 +- pytorch_lightning/lite/lite.py | 16 +-- .../plugins/training_type/ddp.py | 4 +- .../plugins/training_type/ddp2.py | 4 +- .../plugins/training_type/ddp_spawn.py | 4 +- .../plugins/training_type/deepspeed.py | 4 +- pytorch_lightning/plugins/training_type/dp.py | 4 +- .../plugins/training_type/fully_sharded.py | 4 +- .../plugins/training_type/horovod.py | 4 +- .../plugins/training_type/sharded.py | 4 +- .../plugins/training_type/sharded_spawn.py | 4 +- .../connectors/accelerator_connector.py | 80 ++++++------- pytorch_lightning/trainer/data_loading.py | 4 +- pytorch_lightning/trainer/trainer.py | 12 +- pytorch_lightning/utilities/__init__.py | 1 + pytorch_lightning/utilities/enums.py | 105 +++++++++++++++--- .../test_accelerator_connector.py | 4 +- tests/base/model_test_epoch_ends.py | 10 +- tests/deprecated_api/test_remove_1-8.py | 23 ++++ tests/helpers/pipelines.py | 4 +- tests/lite/test_lite.py | 26 ++--- tests/trainer/test_data_loading.py | 4 +- tests/trainer/test_trainer.py | 74 ++++++------ 23 files changed, 250 insertions(+), 151 deletions(-) create mode 100644 tests/deprecated_api/test_remove_1-8.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 02283180b19f4..3281a07ff689a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,7 +44,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103)) -- +- Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/PyTorchLightning/pytorch-lightning/pull/10505)) - diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py index d36e874cbae7b..2a2ed9586b420 100644 --- a/pytorch_lightning/lite/lite.py +++ b/pytorch_lightning/lite/lite.py @@ -41,7 +41,7 @@ ) from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin -from pytorch_lightning.utilities import DeviceType, DistributedType, move_data_to_device +from pytorch_lightning.utilities import _StrategyType, DeviceType, move_data_to_device from pytorch_lightning.utilities.apply_func import apply_to_collection, convert_to_tensors from pytorch_lightning.utilities.data import has_iterable_dataset from pytorch_lightning.utilities.device_parser import _parse_devices @@ -477,14 +477,14 @@ def _supported_device_types() -> Sequence[DeviceType]: ) @staticmethod - def _supported_strategy_types() -> Sequence[DistributedType]: + def _supported_strategy_types() -> Sequence[_StrategyType]: return ( - DistributedType.DP, - DistributedType.DDP, - DistributedType.DDP_SPAWN, - DistributedType.DEEPSPEED, - DistributedType.DDP_SHARDED, - DistributedType.DDP_SHARDED_SPAWN, + _StrategyType.DP, + _StrategyType.DDP, + _StrategyType.DDP_SPAWN, + _StrategyType.DEEPSPEED, + _StrategyType.DDP_SHARDED, + _StrategyType.DDP_SHARDED_SPAWN, ) @staticmethod diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 84e9b55b9ee08..0285859a6714a 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -55,7 +55,7 @@ ReduceOp, sync_ddp_if_available, ) -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException from pytorch_lightning.utilities.seed import reset_seed from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -79,7 +79,7 @@ class DDPPlugin(ParallelPlugin): devices (e.g. GPU) per node. It is very similar to how :mod:`torch.distributed.launch` launches processes. """ - distributed_backend = DistributedType.DDP + distributed_backend = _StrategyType.DDP def __init__( self, diff --git a/pytorch_lightning/plugins/training_type/ddp2.py b/pytorch_lightning/plugins/training_type/ddp2.py index ef623a794da42..a142d518a0f2f 100644 --- a/pytorch_lightning/plugins/training_type/ddp2.py +++ b/pytorch_lightning/plugins/training_type/ddp2.py @@ -15,14 +15,14 @@ from pytorch_lightning.plugins.training_type.ddp import DDPPlugin from pytorch_lightning.utilities.apply_func import apply_to_collection -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.types import _METRIC_COLLECTION class DDP2Plugin(DDPPlugin): """DDP2 behaves like DP in one node, but synchronization across nodes behaves like in DDP.""" - distributed_backend = DistributedType.DDP2 + distributed_backend = _StrategyType.DDP2 @property def global_rank(self) -> int: diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py index 677e031cd04af..a77027adb6dcf 100644 --- a/pytorch_lightning/plugins/training_type/ddp_spawn.py +++ b/pytorch_lightning/plugins/training_type/ddp_spawn.py @@ -43,7 +43,7 @@ ReduceOp, sync_ddp_if_available, ) -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.seed import reset_seed from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -58,7 +58,7 @@ class DDPSpawnPlugin(ParallelPlugin): """Spawns processes using the :func:`torch.multiprocessing.spawn` method and joins processes after training finishes.""" - distributed_backend = DistributedType.DDP_SPAWN + distributed_backend = _StrategyType.DDP_SPAWN def __init__( self, diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py index 2464a8ba4eeca..94235f361d945 100644 --- a/pytorch_lightning/plugins/training_type/deepspeed.py +++ b/pytorch_lightning/plugins/training_type/deepspeed.py @@ -36,7 +36,7 @@ from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType from pytorch_lightning.utilities.apply_func import apply_to_collection from pytorch_lightning.utilities.distributed import log, rank_zero_info, rank_zero_only -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _DEEPSPEED_AVAILABLE from pytorch_lightning.utilities.model_helpers import is_overridden @@ -82,7 +82,7 @@ def _move_float_tensors_to_half(self, batch: Any): class DeepSpeedPlugin(DDPPlugin): - distributed_backend = DistributedType.DEEPSPEED + distributed_backend = _StrategyType.DEEPSPEED DEEPSPEED_ENV_VAR = "PL_DEEPSPEED_CONFIG_PATH" def __init__( diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py index a0f53791bc373..83328e8c47271 100644 --- a/pytorch_lightning/plugins/training_type/dp.py +++ b/pytorch_lightning/plugins/training_type/dp.py @@ -20,7 +20,7 @@ from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin from pytorch_lightning.utilities.apply_func import apply_to_collection -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.types import _METRIC_COLLECTION @@ -29,7 +29,7 @@ class DataParallelPlugin(ParallelPlugin): """Implements data-parallel training in a single process, i.e., the model gets replicated to each device and each gets a split of the data.""" - distributed_backend = DistributedType.DP + distributed_backend = _StrategyType.DP def __init__( self, diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py index 704afa1a91aaa..c9601a905df1c 100644 --- a/pytorch_lightning/plugins/training_type/fully_sharded.py +++ b/pytorch_lightning/plugins/training_type/fully_sharded.py @@ -20,7 +20,7 @@ from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO from pytorch_lightning.plugins.training_type.ddp import DDPPlugin from pytorch_lightning.utilities import _FAIRSCALE_FULLY_SHARDED_AVAILABLE -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException if _FAIRSCALE_FULLY_SHARDED_AVAILABLE: @@ -30,7 +30,7 @@ class DDPFullyShardedPlugin(DDPPlugin): - distributed_backend = DistributedType.DDP_FULLY_SHARDED + distributed_backend = _StrategyType.DDP_FULLY_SHARDED def __init__( self, diff --git a/pytorch_lightning/plugins/training_type/horovod.py b/pytorch_lightning/plugins/training_type/horovod.py index 30360e1ab458f..51558189a3d35 100644 --- a/pytorch_lightning/plugins/training_type/horovod.py +++ b/pytorch_lightning/plugins/training_type/horovod.py @@ -26,7 +26,7 @@ from pytorch_lightning.utilities.distributed import distributed_available from pytorch_lightning.utilities.distributed import group as dist_group from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType if _HOROVOD_AVAILABLE: import horovod.torch as hvd @@ -35,7 +35,7 @@ class HorovodPlugin(ParallelPlugin): """Plugin for Horovod distributed training integration.""" - distributed_backend = DistributedType.HOROVOD + distributed_backend = _StrategyType.HOROVOD def __init__( self, diff --git a/pytorch_lightning/plugins/training_type/sharded.py b/pytorch_lightning/plugins/training_type/sharded.py index 5955f3a46f38e..d7563437bd16b 100644 --- a/pytorch_lightning/plugins/training_type/sharded.py +++ b/pytorch_lightning/plugins/training_type/sharded.py @@ -23,7 +23,7 @@ from pytorch_lightning.plugins.training_type.ddp import DDPPlugin from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, _FAIRSCALE_OSS_FP16_BROADCAST_AVAILABLE, rank_zero_only -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException if _FAIRSCALE_AVAILABLE: @@ -36,7 +36,7 @@ class DDPShardedPlugin(DDPPlugin): """Optimizer and gradient sharded training provided by FairScale.""" - distributed_backend = DistributedType.DDP_SHARDED + distributed_backend = _StrategyType.DDP_SHARDED _REDUCE_BUFFER_SIZE_DEFAULT: int = 2 ** 23 # 8M def __init__(self, *args, **kwargs): diff --git a/pytorch_lightning/plugins/training_type/sharded_spawn.py b/pytorch_lightning/plugins/training_type/sharded_spawn.py index e0ae5c7bba187..12e627edbe5cb 100644 --- a/pytorch_lightning/plugins/training_type/sharded_spawn.py +++ b/pytorch_lightning/plugins/training_type/sharded_spawn.py @@ -24,7 +24,7 @@ from pytorch_lightning.plugins.training_type.ddp_spawn import DDPSpawnPlugin from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE, rank_zero_only -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException if _FAIRSCALE_AVAILABLE: @@ -38,7 +38,7 @@ class DDPSpawnShardedPlugin(DDPSpawnPlugin): """Optimizer sharded training provided by FairScale.""" - distributed_backend = DistributedType.DDP_SHARDED_SPAWN + distributed_backend = _StrategyType.DDP_SHARDED_SPAWN def configure_ddp(self) -> None: trainer = self.lightning_module.trainer diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 43eb65ce21a22..47deeed2dca1d 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -61,10 +61,10 @@ TorchElasticEnvironment, ) from pytorch_lightning.utilities import ( + _StrategyType, AMPType, device_parser, DeviceType, - DistributedType, rank_zero_deprecation, rank_zero_info, rank_zero_warn, @@ -278,7 +278,7 @@ def _set_devices_if_none(self) -> None: self.devices = self.num_processes def _handle_accelerator_and_strategy(self) -> None: - deprecated_types = [t for t in DistributedType if t not in (DistributedType.TPU_SPAWN, DistributedType.DDP_CPU)] + deprecated_types = [t for t in _StrategyType if t not in (_StrategyType.TPU_SPAWN, _StrategyType.DDP_CPU)] if self.distributed_backend is not None and self.distributed_backend in deprecated_types: rank_zero_deprecation( f"Passing `Trainer(accelerator={self.distributed_backend!r})` has been deprecated" @@ -290,12 +290,12 @@ def _handle_accelerator_and_strategy(self) -> None: f" also passed `Trainer(accelerator={self.distributed_backend!r})`." f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead." ) - if self.strategy == DistributedType.TPU_SPAWN: + if self.strategy == _StrategyType.TPU_SPAWN: raise MisconfigurationException( "`Trainer(strategy='tpu_spawn')` is not a valid strategy," " you can use `Trainer(strategy='ddp_spawn', accelerator='tpu')` instead." ) - if self.strategy == DistributedType.DDP_CPU: + if self.strategy == _StrategyType.DDP_CPU: raise MisconfigurationException( "`Trainer(strategy='ddp_cpu')` is not a valid strategy," " you can use `Trainer(strategy='ddp'|'ddp_spawn', accelerator='cpu')` instead." @@ -505,31 +505,31 @@ def _map_devices_to_accelerator(self, accelerator: str) -> bool: @property def use_dp(self) -> bool: - return self._distrib_type == DistributedType.DP + return self._distrib_type == _StrategyType.DP @property def use_ddp(self) -> bool: return self._distrib_type in ( - DistributedType.DDP, - DistributedType.DDP_SPAWN, - DistributedType.DDP_SHARDED, - DistributedType.DDP_SHARDED_SPAWN, - DistributedType.DDP_FULLY_SHARDED, - DistributedType.DEEPSPEED, - DistributedType.TPU_SPAWN, + _StrategyType.DDP, + _StrategyType.DDP_SPAWN, + _StrategyType.DDP_SHARDED, + _StrategyType.DDP_SHARDED_SPAWN, + _StrategyType.DDP_FULLY_SHARDED, + _StrategyType.DEEPSPEED, + _StrategyType.TPU_SPAWN, ) @property def use_ddp2(self) -> bool: - return self._distrib_type == DistributedType.DDP2 + return self._distrib_type == _StrategyType.DDP2 @property def use_horovod(self) -> bool: - return self._distrib_type == DistributedType.HOROVOD + return self._distrib_type == _StrategyType.HOROVOD @property def use_deepspeed(self) -> bool: - return self._distrib_type == DistributedType.DEEPSPEED + return self._distrib_type == _StrategyType.DEEPSPEED @property def _is_sharded_training_type(self) -> bool: @@ -590,7 +590,7 @@ def root_gpu(self) -> Optional[int]: @staticmethod def _is_plugin_training_type(plugin: Union[str, TrainingTypePlugin]) -> bool: - if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(DistributedType)): + if isinstance(plugin, str) and (plugin in TrainingTypePluginsRegistry or plugin in list(_StrategyType)): return True return isinstance(plugin, TrainingTypePlugin) @@ -635,7 +635,7 @@ def select_precision_plugin(self) -> PrecisionPlugin: ) return TPUBf16PrecisionPlugin() - if self._distrib_type == DistributedType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin): + if self._distrib_type == _StrategyType.DEEPSPEED or isinstance(self._training_type_plugin, DeepSpeedPlugin): return DeepSpeedPrecisionPlugin(self.precision) if self.precision == 32: @@ -706,15 +706,15 @@ def select_training_type_plugin(self) -> TrainingTypePlugin: use_slurm_ddp = self.use_ddp and self._is_slurm_managing_tasks use_torchelastic_ddp = self.use_ddp and TorchElasticEnvironment.is_using_torchelastic() use_kubeflow_ddp = self.use_ddp and KubeflowEnvironment.is_using_kubeflow() - use_ddp_spawn = self._distrib_type == DistributedType.DDP_SPAWN + use_ddp_spawn = self._distrib_type == _StrategyType.DDP_SPAWN use_ddp_cpu_spawn = use_ddp_spawn and self.use_cpu - use_tpu_spawn = self.use_tpu and self._distrib_type == DistributedType.TPU_SPAWN + use_tpu_spawn = self.use_tpu and self._distrib_type == _StrategyType.TPU_SPAWN use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment.is_using_torchelastic() use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment.is_using_kubeflow() use_ddp_cpu_slurm = use_ddp_cpu_spawn and self._is_slurm_managing_tasks - use_ddp_sharded = self._distrib_type == DistributedType.DDP_SHARDED - use_ddp_sharded_spawn = self._distrib_type == DistributedType.DDP_SHARDED_SPAWN - use_ddp_fully_sharded = self._distrib_type == DistributedType.DDP_FULLY_SHARDED + use_ddp_sharded = self._distrib_type == _StrategyType.DDP_SHARDED + use_ddp_sharded_spawn = self._distrib_type == _StrategyType.DDP_SHARDED_SPAWN + use_ddp_fully_sharded = self._distrib_type == _StrategyType.DDP_FULLY_SHARDED if use_tpu_spawn: ddp_plugin_cls = TPUSpawnPlugin @@ -839,27 +839,27 @@ def set_distributed_mode(self, strategy: Optional[str] = None): if self.has_horovodrun(): self._set_horovod_backend() elif self.num_gpus == 0 and self.num_nodes > 1: - self._distrib_type = DistributedType.DDP + self._distrib_type = _StrategyType.DDP elif self.num_gpus == 0 and self.num_processes > 1: - self.distributed_backend = DistributedType.DDP_SPAWN + self.distributed_backend = _StrategyType.DDP_SPAWN elif self.num_gpus > 1 and not _use_cpu: rank_zero_warn( "You requested multiple GPUs but did not specify a backend, e.g." ' `Trainer(strategy="dp"|"ddp"|"ddp2")`. Setting `strategy="ddp_spawn"` for you.' ) - self.distributed_backend = DistributedType.DDP_SPAWN + self.distributed_backend = _StrategyType.DDP_SPAWN # special case with DDP on CPUs - if self.distributed_backend == DistributedType.DDP_CPU: + if self.distributed_backend == _StrategyType.DDP_CPU: if _TPU_AVAILABLE: raise MisconfigurationException( "`accelerator='ddp_cpu'` is not supported on TPU machines. " "Learn more: https://github.com/PyTorchLightning/pytorch-lightning/issues/7810" ) if self.num_processes == 1 and self.num_nodes > 1: - self._distrib_type = DistributedType.DDP + self._distrib_type = _StrategyType.DDP else: - self._distrib_type = DistributedType.DDP_SPAWN + self._distrib_type = _StrategyType.DDP_SPAWN if self.num_gpus > 0: rank_zero_warn( "You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs." @@ -872,25 +872,25 @@ def set_distributed_mode(self, strategy: Optional[str] = None): elif self.has_tpu and not _use_cpu: self._device_type = DeviceType.TPU if isinstance(self.tpu_cores, int): - self._distrib_type = DistributedType.TPU_SPAWN + self._distrib_type = _StrategyType.TPU_SPAWN elif self.has_ipu and not _use_cpu: self._device_type = DeviceType.IPU elif self.distributed_backend and self._distrib_type is None: - self._distrib_type = DistributedType(self.distributed_backend) + self._distrib_type = _StrategyType(self.distributed_backend) if self.num_gpus > 0 and not _use_cpu: self._device_type = DeviceType.GPU - _gpu_distrib_types = (DistributedType.DP, DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2) + _gpu_distrib_types = (_StrategyType.DP, _StrategyType.DDP, _StrategyType.DDP_SPAWN, _StrategyType.DDP2) # DP and DDP2 cannot run without GPU if self.num_gpus == 0 and self._distrib_type in _gpu_distrib_types and not _use_cpu: if (self.num_nodes and self.num_nodes > 1) or (self.num_processes and self.num_processes > 1): - if self._distrib_type in (DistributedType.DP, DistributedType.DDP2): + if self._distrib_type in (_StrategyType.DP, _StrategyType.DDP2): rank_zero_warn( f"{self._distrib_type.value!r} is not supported on CPUs, hence setting `strategy='ddp'`." ) - self._distrib_type = DistributedType.DDP + self._distrib_type = _StrategyType.DDP else: rank_zero_warn("You are running on single node with no parallelization, so distributed has no effect.") self._distrib_type = None @@ -900,28 +900,28 @@ def set_distributed_mode(self, strategy: Optional[str] = None): # for DDP overwrite nb processes by requested GPUs if self._device_type == DeviceType.GPU and self._distrib_type in ( - DistributedType.DDP, - DistributedType.DDP_SPAWN, + _StrategyType.DDP, + _StrategyType.DDP_SPAWN, ): self.num_processes = self.num_gpus - if self._device_type == DeviceType.GPU and self._distrib_type == DistributedType.DDP2: + if self._device_type == DeviceType.GPU and self._distrib_type == _StrategyType.DDP2: self.num_processes = self.num_nodes # Horovod is an extra case... - if self.distributed_backend == DistributedType.HOROVOD: + if self.distributed_backend == _StrategyType.HOROVOD: self._set_horovod_backend() using_valid_distributed = self.use_ddp or self.use_ddp2 if self.num_nodes > 1 and not using_valid_distributed: - # throw error to force user to choose a supported distributed type such as ddp or ddp2 + # throw error to force user to choose a supported strategy type such as ddp or ddp2 raise MisconfigurationException( "Your chosen strategy does not support `num_nodes > 1`. Please set `strategy=('ddp'|'ddp2')`." ) def _set_horovod_backend(self): self.check_horovod() - self._distrib_type = DistributedType.HOROVOD + self._distrib_type = _StrategyType.HOROVOD # Initialize Horovod to get rank / size info hvd.init() @@ -941,7 +941,7 @@ def check_interactive_compatibility(self): f"`Trainer(strategy={self._distrib_type.value!r})` or" f" `Trainer(accelerator={self._distrib_type.value!r})` is not compatible with an interactive" " environment. Run your code as a script, or choose one of the compatible backends:" - f" {', '.join(DistributedType.interactive_compatible_types())}." + f" {', '.join(_StrategyType.interactive_compatible_types())}." " In case you are spawning processes yourself, make sure to include the Trainer" " creation inside the worker function." ) diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py index 37a234f32f711..931f6a92958ee 100644 --- a/pytorch_lightning/trainer/data_loading.py +++ b/pytorch_lightning/trainer/data_loading.py @@ -38,7 +38,7 @@ FastForwardSampler, ) from pytorch_lightning.utilities.data import get_len, has_iterable_dataset, has_len_all_ranks -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _fault_tolerant_training from pytorch_lightning.utilities.model_helpers import is_overridden @@ -70,7 +70,7 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None: if not isinstance(dataloader, DataLoader): return - using_spawn = self._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN + using_spawn = self._accelerator_connector._distrib_type == _StrategyType.DDP_SPAWN num_cpus = multiprocessing.cpu_count() # ddp_spawn + num_workers > 0 don't mix! tell the user diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b84f03393309b..5007927aa93e2 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -64,10 +64,10 @@ from pytorch_lightning.tuner.tuning import Tuner from pytorch_lightning.utilities import ( _IPU_AVAILABLE, + _StrategyType, _TPU_AVAILABLE, device_parser, DeviceType, - DistributedType, GradClipAlgorithmType, parsing, rank_zero_deprecation, @@ -1591,7 +1591,7 @@ def should_rank_save_checkpoint(self) -> bool: return self.training_type_plugin.should_rank_save_checkpoint @property - def _distrib_type(self) -> DistributedType: + def _distrib_type(self) -> _StrategyType: return self._accelerator_connector._distrib_type @property @@ -1754,10 +1754,10 @@ def distributed_sampler_kwargs(self) -> Optional[dict]: @property def data_parallel(self) -> bool: return self._distrib_type in ( - DistributedType.DP, - DistributedType.DDP, - DistributedType.DDP_SPAWN, - DistributedType.DDP2, + _StrategyType.DP, + _StrategyType.DDP, + _StrategyType.DDP_SPAWN, + _StrategyType.DDP2, ) @property diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index 7343e28d6d811..22164908a3e3f 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -18,6 +18,7 @@ from pytorch_lightning.utilities.apply_func import move_data_to_device # noqa: F401 from pytorch_lightning.utilities.distributed import AllGatherGrad, rank_zero_info, rank_zero_only # noqa: F401 from pytorch_lightning.utilities.enums import ( # noqa: F401 + _StrategyType, AMPType, DeviceType, DistributedType, diff --git a/pytorch_lightning/utilities/enums.py b/pytorch_lightning/utilities/enums.py index 436c675c382c2..18b0336b82d5f 100644 --- a/pytorch_lightning/utilities/enums.py +++ b/pytorch_lightning/utilities/enums.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """Enumerated utilities.""" -from enum import Enum -from typing import List, Optional, Union +from enum import Enum, EnumMeta +from typing import Any, List, Optional, Union + +from pytorch_lightning.utilities.warnings import rank_zero_deprecation class LightningEnum(str, Enum): @@ -37,6 +39,31 @@ def __hash__(self) -> int: return hash(self.value.lower()) +class _OnAccessEnumMeta(EnumMeta): + """Enum with a hook to run a function whenever a member is accessed. + + Adapted from: + https://www.buzzphp.com/posts/how-do-i-detect-and-invoke-a-function-when-a-python-enum-member-is-accessed + """ + + def __getattribute__(cls, name: str) -> Any: + obj = super().__getattribute__(name) + if isinstance(obj, Enum): + obj.deprecate() + return obj + + def __getitem__(cls, name: str) -> Any: + member = super().__getitem__(name) + member.deprecate() + return member + + def __call__(cls, value: str, *args: Any, **kwargs: Any) -> Any: + obj = super().__call__(value, *args, **kwargs) + if isinstance(obj, Enum): + obj.deprecate() + return obj + + class AMPType(LightningEnum): """Type of Automatic Mixed Precission used for training. @@ -73,8 +100,8 @@ def supported_types() -> List[str]: return [x.value for x in PrecisionType] -class DistributedType(LightningEnum): - """Define type of distributed computing. +class DistributedType(LightningEnum, metaclass=_OnAccessEnumMeta): + """Define type of training strategy. >>> # you can match the type with string >>> DistributedType.DDP == 'ddp' @@ -82,8 +109,24 @@ class DistributedType(LightningEnum): >>> # which is case invariant >>> DistributedType.DDP2 in ('ddp2', ) True + + Deprecated since v1.6.0 and will be removed in v1.8.0. + + Use `_StrategyType` instead. """ + DP = "dp" + DDP = "ddp" + DDP2 = "ddp2" + DDP_CPU = "ddp_cpu" + DDP_SPAWN = "ddp_spawn" + TPU_SPAWN = "tpu_spawn" + DEEPSPEED = "deepspeed" + HOROVOD = "horovod" + DDP_SHARDED = "ddp_sharded" + DDP_SHARDED_SPAWN = "ddp_sharded_spawn" + DDP_FULLY_SHARDED = "ddp_fully_sharded" + @staticmethod def interactive_compatible_types() -> List["DistributedType"]: """Returns a list containing interactive compatible DistributeTypes.""" @@ -98,17 +141,11 @@ def is_interactive_compatible(self) -> bool: """Returns whether self is interactive compatible.""" return self in DistributedType.interactive_compatible_types() - DP = "dp" - DDP = "ddp" - DDP2 = "ddp2" - DDP_CPU = "ddp_cpu" - DDP_SPAWN = "ddp_spawn" - TPU_SPAWN = "tpu_spawn" - DEEPSPEED = "deepspeed" - HOROVOD = "horovod" - DDP_SHARDED = "ddp_sharded" - DDP_SHARDED_SPAWN = "ddp_sharded_spawn" - DDP_FULLY_SHARDED = "ddp_fully_sharded" + def deprecate(self) -> None: + rank_zero_deprecation( + "`DistributedType` Enum has been deprecated in v1.6 and will be removed in v1.8." + " Use the string value `{self.value!r}` instead." + ) class DeviceType(LightningEnum): @@ -188,3 +225,41 @@ def get_max_depth(mode: str) -> int: @staticmethod def supported_types() -> List[str]: return [x.value for x in ModelSummaryMode] + + +class _StrategyType(LightningEnum): + """Define type of training strategy. + + >>> # you can match the type with string + >>> _StrategyType.DDP == 'ddp' + True + >>> # which is case invariant + >>> _StrategyType.DDP2 in ('ddp2', ) + True + """ + + DP = "dp" + DDP = "ddp" + DDP2 = "ddp2" + DDP_CPU = "ddp_cpu" + DDP_SPAWN = "ddp_spawn" + TPU_SPAWN = "tpu_spawn" + DEEPSPEED = "deepspeed" + HOROVOD = "horovod" + DDP_SHARDED = "ddp_sharded" + DDP_SHARDED_SPAWN = "ddp_sharded_spawn" + DDP_FULLY_SHARDED = "ddp_fully_sharded" + + @staticmethod + def interactive_compatible_types() -> List["_StrategyType"]: + """Returns a list containing interactive compatible _StrategyTypes.""" + return [ + _StrategyType.DP, + _StrategyType.DDP_SPAWN, + _StrategyType.DDP_SHARDED_SPAWN, + _StrategyType.TPU_SPAWN, + ] + + def is_interactive_compatible(self) -> bool: + """Returns whether self is interactive compatible.""" + return self in _StrategyType.interactive_compatible_types() diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index d95f5c8e6f9ea..e70d862b048e0 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -43,7 +43,7 @@ SLURMEnvironment, TorchElasticEnvironment, ) -from pytorch_lightning.utilities import DeviceType, DistributedType +from pytorch_lightning.utilities import _StrategyType, DeviceType from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel from tests.helpers.runif import RunIf @@ -636,7 +636,7 @@ def test_unsupported_distrib_types_on_cpu(training_type): with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting `strategy='ddp"): trainer = Trainer(accelerator=training_type, num_processes=2) - assert trainer._distrib_type == DistributedType.DDP + assert trainer._distrib_type == _StrategyType.DDP def test_accelerator_ddp_for_cpu(tmpdir): diff --git a/tests/base/model_test_epoch_ends.py b/tests/base/model_test_epoch_ends.py index 746ceb94a5de0..b001298e93dd0 100644 --- a/tests/base/model_test_epoch_ends.py +++ b/tests/base/model_test_epoch_ends.py @@ -15,7 +15,7 @@ import torch -from pytorch_lightning.utilities import DistributedType +from pytorch_lightning.utilities import _StrategyType class TestEpochEndVariations(ABC): @@ -34,13 +34,13 @@ def test_epoch_end(self, outputs): test_loss = self.get_output_metric(output, "test_loss") # reduce manually when using dp - if self.trainer._distrib_type == DistributedType.DP: + if self.trainer._distrib_type == _StrategyType.DP: test_loss = torch.mean(test_loss) test_loss_mean += test_loss # reduce manually when using dp test_acc = self.get_output_metric(output, "test_acc") - if self.trainer._distrib_type == DistributedType.DP: + if self.trainer._distrib_type == _StrategyType.DP: test_acc = torch.mean(test_acc) test_acc_mean += test_acc @@ -69,13 +69,13 @@ def test_epoch_end__multiple_dataloaders(self, outputs): test_loss = output["test_loss"] # reduce manually when using dp - if self.trainer._distrib_type == DistributedType.DP: + if self.trainer._distrib_type == _StrategyType.DP: test_loss = torch.mean(test_loss) test_loss_mean += test_loss # reduce manually when using dp test_acc = output["test_acc"] - if self.trainer._distrib_type == DistributedType.DP: + if self.trainer._distrib_type == _StrategyType.DP: test_acc = torch.mean(test_acc) test_acc_mean += test_acc diff --git a/tests/deprecated_api/test_remove_1-8.py b/tests/deprecated_api/test_remove_1-8.py new file mode 100644 index 0000000000000..f668f63b9f450 --- /dev/null +++ b/tests/deprecated_api/test_remove_1-8.py @@ -0,0 +1,23 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test deprecated functionality which will be removed in v1.8.0.""" +import pytest + +from pytorch_lightning.utilities.enums import DistributedType + + +def test_v1_8_0_deprecated_distributed_type_enum(): + + with pytest.deprecated_call(match="has been deprecated in v1.6 and will be removed in v1.8."): + _ = DistributedType.DDP diff --git a/tests/helpers/pipelines.py b/tests/helpers/pipelines.py index 643d3e50cb894..6fa3bbb5dc943 100644 --- a/tests/helpers/pipelines.py +++ b/tests/helpers/pipelines.py @@ -15,7 +15,7 @@ from torchmetrics.functional import accuracy from pytorch_lightning import LightningDataModule, LightningModule, Trainer -from pytorch_lightning.utilities import DistributedType +from pytorch_lightning.utilities import _StrategyType from tests.helpers import BoringModel from tests.helpers.utils import get_default_logger, load_model_from_checkpoint, reset_seed @@ -82,7 +82,7 @@ def run_model_test( run_prediction_eval_model_template(model, dataloader, min_acc=min_acc) if with_hpc: - if trainer._distrib_type in (DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2): + if trainer._distrib_type in (_StrategyType.DDP, _StrategyType.DDP_SPAWN, _StrategyType.DDP2): # on hpc this would work fine... but need to hack it for the purpose of the test trainer.optimizers, trainer.lr_schedulers, trainer.optimizer_frequencies = trainer.init_optimizers( pretrained_model diff --git a/tests/lite/test_lite.py b/tests/lite/test_lite.py index bd69cf359473e..7c79cb7f2e709 100644 --- a/tests/lite/test_lite.py +++ b/tests/lite/test_lite.py @@ -31,7 +31,7 @@ _replace_dataloader_init_method, ) from pytorch_lightning.plugins import DeepSpeedPlugin, PrecisionPlugin, TrainingTypePlugin -from pytorch_lightning.utilities import DistributedType +from pytorch_lightning.utilities import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.seed import pl_worker_init_function from tests.helpers.runif import RunIf @@ -251,12 +251,12 @@ def test_seed_everything(): @pytest.mark.parametrize( "strategy", [ - DistributedType.DP, - DistributedType.DDP, - DistributedType.DDP_SPAWN, - pytest.param(DistributedType.DEEPSPEED, marks=RunIf(deepspeed=True)), - pytest.param(DistributedType.DDP_SHARDED, marks=RunIf(fairscale=True)), - pytest.param(DistributedType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)), + _StrategyType.DP, + _StrategyType.DDP, + _StrategyType.DDP_SPAWN, + pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)), + pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)), + pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)), ], ) def test_setup_dataloaders_replace_custom_sampler(strategy): @@ -279,12 +279,12 @@ def test_setup_dataloaders_replace_custom_sampler(strategy): @pytest.mark.parametrize( "strategy", [ - DistributedType.DP, - DistributedType.DDP, - DistributedType.DDP_SPAWN, - pytest.param(DistributedType.DEEPSPEED, marks=RunIf(deepspeed=True)), - pytest.param(DistributedType.DDP_SHARDED, marks=RunIf(fairscale=True)), - pytest.param(DistributedType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)), + _StrategyType.DP, + _StrategyType.DDP, + _StrategyType.DDP_SPAWN, + pytest.param(_StrategyType.DEEPSPEED, marks=RunIf(deepspeed=True)), + pytest.param(_StrategyType.DDP_SHARDED, marks=RunIf(fairscale=True)), + pytest.param(_StrategyType.DDP_SHARDED_SPAWN, marks=RunIf(fairscale=True)), ], ) @pytest.mark.parametrize("shuffle", [True, False]) diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py index 97097b2074ca1..4f3a482e37ac4 100644 --- a/tests/trainer/test_data_loading.py +++ b/tests/trainer/test_data_loading.py @@ -20,7 +20,7 @@ from torch.utils.data.sampler import BatchSampler, Sampler, SequentialSampler from pytorch_lightning import Trainer -from pytorch_lightning.utilities.enums import DistributedType +from pytorch_lightning.utilities.enums import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers import BoringModel, RandomDataset from tests.helpers.runif import RunIf @@ -137,7 +137,7 @@ def _get_warning_msg(): @pytest.mark.parametrize("num_workers", [0, 1]) def test_dataloader_warnings(tmpdir, num_workers): trainer = Trainer(default_root_dir=tmpdir, strategy="ddp_spawn", num_processes=2, fast_dev_run=4) - assert trainer._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN + assert trainer._accelerator_connector._distrib_type == _StrategyType.DDP_SPAWN trainer.fit(TestSpawnBoringModel(num_workers)) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index d2e5f771a9c40..dc0ce2b68452c 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -48,7 +48,7 @@ DDPSpawnShardedPlugin, ) from pytorch_lightning.trainer.states import TrainerFn -from pytorch_lightning.utilities import DeviceType, DistributedType +from pytorch_lightning.utilities import _StrategyType, DeviceType from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import DeadlockDetectedException, MisconfigurationException from pytorch_lightning.utilities.seed import seed_everything @@ -1154,15 +1154,15 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ( dict(accelerator="ddp", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(accelerator="ddp", num_nodes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( dict(accelerator="ddp_cpu", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(accelerator="ddp2", gpus=None), @@ -1174,43 +1174,43 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ( dict(accelerator="dp", gpus=1), - dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(accelerator="ddp", gpus=1), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(accelerator="ddp_cpu", num_processes=2, gpus=1), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(accelerator="ddp2", gpus=1), - dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(accelerator=None, gpus=2), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( dict(accelerator="dp", gpus=2), - dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(accelerator="ddp", gpus=2), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( dict(accelerator="ddp2", gpus=2), - dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(accelerator="ddp2", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(accelerator="dp", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ], ) @@ -2096,11 +2096,11 @@ def training_step(self, batch, batch_idx): ), ( dict(strategy="ddp", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(strategy="ddp", num_nodes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=1), ), ( dict(strategy="ddp2", gpus=None), @@ -2112,47 +2112,47 @@ def training_step(self, batch, batch_idx): ), ( dict(strategy="dp", gpus=1), - dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(strategy="ddp", gpus=1), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(strategy="ddp_spawn", gpus=1), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(strategy="ddp2", gpus=1), - dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), + dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1), ), ( dict(strategy=None, gpus=2), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( dict(strategy="dp", gpus=2), - dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(strategy="ddp", gpus=2), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=2), ), ( dict(strategy="ddp2", gpus=2), - dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(strategy="ddp2", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(strategy="dp", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(strategy="ddp_spawn", num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(strategy="ddp_spawn", num_processes=1, gpus=None), @@ -2161,7 +2161,7 @@ def training_step(self, batch, batch_idx): ( dict(strategy="ddp_fully_sharded", gpus=1), dict( - _distrib_type=DistributedType.DDP_FULLY_SHARDED, + _distrib_type=_StrategyType.DDP_FULLY_SHARDED, _device_type=DeviceType.GPU, num_gpus=1, num_processes=1, @@ -2169,32 +2169,32 @@ def training_step(self, batch, batch_idx): ), ( dict(strategy=DDPSpawnPlugin(), num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(strategy=DDPSpawnPlugin(), gpus=2), - dict(_distrib_type=DistributedType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DDP_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(strategy=DDPPlugin(), num_processes=2, gpus=None), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.CPU, num_gpus=0, num_processes=2), ), ( dict(strategy=DDPPlugin(), gpus=2), - dict(_distrib_type=DistributedType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DDP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(strategy=DDP2Plugin(), gpus=2), - dict(_distrib_type=DistributedType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DDP2, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(strategy=DataParallelPlugin(), gpus=2), - dict(_distrib_type=DistributedType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DP, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ( dict(strategy=DDPFullyShardedPlugin(), gpus=2), dict( - _distrib_type=DistributedType.DDP_FULLY_SHARDED, + _distrib_type=_StrategyType.DDP_FULLY_SHARDED, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1, @@ -2203,7 +2203,7 @@ def training_step(self, batch, batch_idx): ( dict(strategy=DDPSpawnShardedPlugin(), gpus=2), dict( - _distrib_type=DistributedType.DDP_SHARDED_SPAWN, + _distrib_type=_StrategyType.DDP_SHARDED_SPAWN, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1, @@ -2211,7 +2211,7 @@ def training_step(self, batch, batch_idx): ), ( dict(strategy=DDPShardedPlugin(), gpus=2), - dict(_distrib_type=DistributedType.DDP_SHARDED, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), + dict(_distrib_type=_StrategyType.DDP_SHARDED, _device_type=DeviceType.GPU, num_gpus=2, num_processes=1), ), ], ) From ae71284627793f92027a6515bf816cb855282593 Mon Sep 17 00:00:00 2001 From: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Date: Tue, 16 Nov 2021 00:12:00 +0530 Subject: [PATCH 18/18] Remove deprecated `disable_validation` property from Trainer (#10450) --- CHANGELOG.md | 5 +++++ pytorch_lightning/trainer/trainer.py | 9 --------- tests/deprecated_api/test_remove_1-6.py | 6 ------ 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3281a07ff689a..7002d1680856c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -127,8 +127,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) +- Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/PyTorchLightning/pytorch-lightning/pull/10450)) + + - Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525)) + + ### Fixed - Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/PyTorchLightning/pytorch-lightning/issues/9702)) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 5007927aa93e2..4cbb33c9b4766 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1783,15 +1783,6 @@ def _should_reload_dl_epoch(self) -> bool: n_epochs = self.reload_dataloaders_every_n_epochs return n_epochs and (not self.current_epoch % n_epochs) - @property - def disable_validation(self) -> bool: - """Check if validation is disabled during training.""" - rank_zero_deprecation( - "`trainer.disable_validation` is deprecated in v1.4 and will be removed in v1.6." - " Use `not trainer.enable_validation` instead." - ) - return not self.enable_validation - @property def enable_validation(self) -> bool: """Check if we should run validation during training.""" diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index efb288a623d6a..1ded07734a7de 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -47,9 +47,3 @@ def test_v1_6_0_reload_dataloaders_every_epoch(tmpdir): [call.val_dataloader()] + [call.train_dataloader(), call.val_dataloader()] * 3 + [call.test_dataloader()] ) assert tracker.mock_calls == expected_sequence - - -def test_v1_6_0_deprecated_disable_validation(): - trainer = Trainer() - with pytest.deprecated_call(match="disable_validation` is deprecated in v1.4"): - _ = trainer.disable_validation