From 73c793a9ee2b2edb5ffe4b7f42caecf2f71d4716 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Mon, 25 Jul 2022 22:51:13 +0900 Subject: [PATCH 01/35] append cuda version to tags --- .github/workflows/cicd-pytorch_dockers.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 4742f3579c274..beb4b4fb92c42 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -109,7 +109,7 @@ jobs: UBUNTU_VERSION=${{ matrix.ubuntu_version }} file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} + tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v1 if: failure() && env.PUSH_TO_HUB == 'true' @@ -148,7 +148,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-conda/Dockerfile push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} + tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v1 if: failure() && env.PUSH_TO_HUB == 'true' From 46a05fccbb9b596aa98d5d68424917b5811c5b4f Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 02:33:48 +0900 Subject: [PATCH 02/35] revertme: push to hub --- .github/workflows/cicd-pytorch_dockers.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index beb4b4fb92c42..a0931c54245a7 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -108,7 +108,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} UBUNTU_VERSION=${{ matrix.ubuntu_version }} file: dockers/base-cuda/Dockerfile - push: ${{ env.PUSH_TO_HUB }} + push: true tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v1 @@ -147,7 +147,7 @@ jobs: PYTORCH_VERSION=${{ matrix.pytorch_version }} CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-conda/Dockerfile - push: ${{ env.PUSH_TO_HUB }} + push: true tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v1 From 1f0e5a462141c9ce99a60a0be09db45b9314c4c2 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 03:03:09 +0900 Subject: [PATCH 03/35] Update docker readme --- dockers/README.md | 45 +++++++++++---------------------------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/dockers/README.md b/dockers/README.md index 533c85739f528..b1ff9826b6c1f 100644 --- a/dockers/README.md +++ b/dockers/README.md @@ -1,36 +1,17 @@ # Docker images -## Builds images form attached Dockerfiles +## Build images from Dockerfiles You can build it on your own, note it takes lots of time, be prepared. ```bash -git clone -docker image build -t pytorch-lightning:latest -f dockers/conda/Dockerfile . -``` - -or with specific arguments - -```bash -git clone -docker image build \ - -t pytorch-lightning:base-cuda-py3.9-pt1.10 \ - -f dockers/base-cuda/Dockerfile \ - --build-arg PYTHON_VERSION=3.9 \ - --build-arg PYTORCH_VERSION=1.10 \ - . -``` +git clone https://github.com/Lightning-AI/lightning.git -or nightly version from Conda +# build with the default arguments +docker image build -t pytorch-lightning:latest -f dockers/base-cuda/Dockerfile . -```bash -git clone -docker image build \ - -t pytorch-lightning:base-conda-py3.9-pt1.11 \ - -f dockers/base-conda/Dockerfile \ - --build-arg PYTHON_VERSION=3.9 \ - --build-arg PYTORCH_VERSION=1.11 \ - . +# build with specific arguments +docker image build -t pytorch-lightning:base-cuda-py3.9-torch1.11-cuda11.3.1 -f dockers/base-cuda/Dockerfile --build-arg PYTHON_VERSION=3.9 --build-arg PYTORCH_VERSION=1.11 --build-arg CUDA_VERSION=11.3.1 . ``` To run your docker use @@ -49,7 +30,7 @@ docker image rm pytorch-lightning:latest ## Run docker image with GPUs -To run docker image with access to you GPUs you need to install +To run docker image with access to your GPUs, you need to install ```bash # Add the package repositories @@ -61,10 +42,10 @@ sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit sudo systemctl restart docker ``` -and later run the docker image with `--gpus all` so for example +and later run the docker image with `--gpus all`. For example, ``` -docker run --rm -it --gpus all pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.10 +docker run --rm -it --gpus all pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11-cuda11.3.1 ``` ## Run Jupyter server @@ -73,15 +54,11 @@ Inspiration comes from https://u.group/thinking/how-to-put-jupyter-notebooks-in- 1. Build the docker image: ```bash - docker image build \ - -t pytorch-lightning:v1.3.1 \ - -f dockers/nvidia/Dockerfile \ - --build-arg LIGHTNING_VERSION=1.3.1 \ - . + docker image build -t pytorch-lightning:v1.6.5 -f dockers/nvidia/Dockerfile --build-arg LIGHTNING_VERSION=1.6.5 . ``` 1. start the server and map ports: ```bash - docker run --rm -it --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -p 8888:8888 pytorch-lightning:v1.3.1 + docker run --rm -it --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all -p 8888:8888 pytorch-lightning:v1.6.5 ``` 1. Connect in local browser: - copy the generated path e.g. `http://hostname:8888/?token=0719fa7e1729778b0cec363541a608d5003e26d4910983c6` From 6b2377167ce35f0d0dbef548323471ac75a0d303 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 03:19:04 +0900 Subject: [PATCH 04/35] Build base-conda-py3.9-torch1.12-cuda11.3.1 --- .azure/gpu-benchmark.yml | 2 +- .azure/gpu-tests.yml | 2 +- .github/workflows/cicd-pytorch_dockers.yml | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.azure/gpu-benchmark.yml b/.azure/gpu-benchmark.yml index 3108f9e78c3c3..96b8107aa4598 100644 --- a/.azure/gpu-benchmark.yml +++ b/.azure/gpu-benchmark.yml @@ -28,7 +28,7 @@ jobs: cancelTimeoutInMinutes: "2" pool: azure-jirka-spot container: - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11-cuda11.3.1" options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g" workspace: clean: all diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml index b5dbd9e3340c7..5ebe5765cd3e3 100644 --- a/.azure/gpu-tests.yml +++ b/.azure/gpu-tests.yml @@ -26,7 +26,7 @@ jobs: strategy: matrix: 'PyTorch - stable': - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11-cuda11.3.1" # how long to run the job before automatically cancelling timeoutInMinutes: "80" # how much time to give 'run always even if cancelled tasks' before stopping them diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index a0931c54245a7..87d9325e0a005 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -130,8 +130,7 @@ jobs: - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - # nightly: add when there's a release candidate - # - {python_version: "3.9", pytorch_version: "1.12"} + - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 From 8a531add6278537e3a34b42d79cfed18dbd409a3 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 03:19:31 +0900 Subject: [PATCH 05/35] Use new images in conda tests --- .github/workflows/ci-pytorch_test-conda.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch_test-conda.yml index 65bee898a6345..91384776b9797 100644 --- a/.github/workflows/ci-pytorch_test-conda.yml +++ b/.github/workflows/ci-pytorch_test-conda.yml @@ -18,16 +18,16 @@ defaults: jobs: conda: runs-on: ubuntu-20.04 - container: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }} + container: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-cuda${{ matrix.cuda-version }} strategy: fail-fast: false matrix: # nightly: add when there's a release candidate include: - - {python-version: "3.8", pytorch-version: "1.9"} - - {python-version: "3.8", pytorch-version: "1.10"} - - {python-version: "3.9", pytorch-version: "1.11"} - - {python-version: "3.9", pytorch-version: "1.12"} + - {python-version: "3.8", pytorch-version: "1.9", cuda-version: "11.1"} + - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.1"} + - {python-version: "3.9", pytorch-version: "1.11", cuda-version: "11.3.1"} + - {python-version: "3.9", pytorch-version: "1.12", cuda-version: "11.3.1"} timeout-minutes: 30 From 0f7d534b2ae41e4bd227961a929c333c88e35f59 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 03:21:47 +0900 Subject: [PATCH 06/35] revertme: push to hub --- .github/workflows/cicd-pytorch_dockers.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 87d9325e0a005..640c632f0fd0e 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -96,7 +96,7 @@ jobs: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 - uses: docker/login-action@v1 - if: env.PUSH_TO_HUB == 'true' + if: true with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} @@ -135,7 +135,7 @@ jobs: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 - uses: docker/login-action@v1 - if: env.PUSH_TO_HUB == 'true' + if: true with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} From 62c3d3da1b41eacdfb2d67fe255543f8908c140a Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 04:36:44 +0900 Subject: [PATCH 07/35] Revert "revertme: push to hub" This reverts commit 0f7d534b2ae41e4bd227961a929c333c88e35f59. --- .github/workflows/cicd-pytorch_dockers.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 640c632f0fd0e..87d9325e0a005 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -96,7 +96,7 @@ jobs: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 - uses: docker/login-action@v1 - if: true + if: env.PUSH_TO_HUB == 'true' with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} @@ -135,7 +135,7 @@ jobs: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 - uses: docker/login-action@v1 - if: true + if: env.PUSH_TO_HUB == 'true' with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} From e0b4fb858af8f031014edac0a492c0fa7d8b909a Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 04:36:51 +0900 Subject: [PATCH 08/35] Revert "revertme: push to hub" This reverts commit 46a05fccbb9b596aa98d5d68424917b5811c5b4f. --- .github/workflows/cicd-pytorch_dockers.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 87d9325e0a005..30bf195361752 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -108,7 +108,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} UBUNTU_VERSION=${{ matrix.ubuntu_version }} file: dockers/base-cuda/Dockerfile - push: true + push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v1 @@ -146,7 +146,7 @@ jobs: PYTORCH_VERSION=${{ matrix.pytorch_version }} CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-conda/Dockerfile - push: true + push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v1 From e08f694f734669e19d06d0f351b7bcadc98107f8 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 04:40:48 +0900 Subject: [PATCH 09/35] Run conda if workflow edited --- .github/workflows/ci-pytorch_test-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch_test-conda.yml index 91384776b9797..46fcdf5316224 100644 --- a/.github/workflows/ci-pytorch_test-conda.yml +++ b/.github/workflows/ci-pytorch_test-conda.yml @@ -45,7 +45,7 @@ jobs: id: skip shell: bash -l {0} run: | - FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*' + FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.github/workflows/ci-pytorch_test-conda.yml' echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt MATCHES=$(cat changed_files.txt | grep -E $FILTER) echo $MATCHES From 72a849242e61d9c6a27cf75f5860c355c87202c2 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 04:45:06 +0900 Subject: [PATCH 10/35] Run gpu testing if workflow edited --- .azure/gpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml index 5ebe5765cd3e3..d48d3677f42e8 100644 --- a/.azure/gpu-tests.yml +++ b/.azure/gpu-tests.yml @@ -44,7 +44,7 @@ jobs: - bash: | CHANGED_FILES=$(git diff --name-status origin/master -- . | awk '{print $2}') - FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*' + FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.azure/gpu-*.yml' echo $CHANGED_FILES > changed_files.txt MATCHES=$(cat changed_files.txt | grep -E $FILTER) echo $MATCHES From 3c35befe51990a013085ff0953198791748c28df Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 05:28:12 +0900 Subject: [PATCH 11/35] Use new tags in release/Dockerfile --- dockers/release/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index cb393c91dfbe0..c39e66509188c 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -14,8 +14,9 @@ ARG PYTHON_VERSION=3.9 ARG PYTORCH_VERSION=1.11 +ARG CUDA_VERSION=11.3.1 -FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} +FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}-cuda${CUDA_VERSION} LABEL maintainer="Lightning-AI " From cfd45f7a28fce16b18372d26ba648c3f89d0cf5b Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 05:38:17 +0900 Subject: [PATCH 12/35] Build base-cuda and PL release images with all combinations --- .github/workflows/cicd-pytorch_dockers.yml | 42 ++++++++++++++++------ 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 30bf195361752..f66dc1194712c 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -29,9 +29,22 @@ jobs: strategy: fail-fast: false matrix: - # the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image - python_version: ["3.9"] - pytorch_version: ["1.10", "1.11"] + include: + # Include all Python and PyTorch versions that PL supports. + - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 @@ -85,13 +98,22 @@ jobs: fail-fast: false matrix: include: - # the config used in '.azure-pipelines/gpu-tests.yml' - - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} - # latest (used in Tutorials) - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} + # These are the base images for PL release docker image distributions, + # so include all Python and PyTorch versions that PL supports. + - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 From 69de92f39c62d844ccfe0aa731ad2c12fcf210e9 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 05:43:06 +0900 Subject: [PATCH 13/35] Update release docker --- .github/workflows/release-docker.yml | 41 ++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 9d87f1a582fb1..ece010122ed7f 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -1,6 +1,5 @@ name: Docker -# https://www.docker.com/blog/first-docker-github-action-is-here -# https://github.com/docker/build-push-action + on: push: branches: [master, "release/*"] @@ -15,8 +14,22 @@ jobs: strategy: fail-fast: false matrix: - python_version: ["3.7", "3.8", "3.9"] - pytorch_version: ["1.9", "1.10"] + include: + # Include all Python and PyTorch versions that PL supports. + - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - name: Checkout uses: actions/checkout@v2 @@ -32,19 +45,29 @@ jobs: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} dockerfile: dockers/release/Dockerfile - build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }} - tags: "${{ steps.get_version.outputs.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}" + build_args: | + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + CUDA_VERSION=${{ matrix.cuda_version }} + LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }} + tags: | + ${{ steps.get_version.outputs.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} + latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} timeout-minutes: 55 - name: Publish Latest to Docker uses: docker/build-push-action@v1.1.0 - # only on releases and latest Python and PyTorch - if: matrix.python_version == '3.9' && matrix.pytorch_version == '1.10' + # Only latest Python and PyTorch + if: matrix.python_version == '3.10' && matrix.pytorch_version == '1.11' with: repository: pytorchlightning/pytorch_lightning username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} dockerfile: dockers/release/Dockerfile - build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }} + build_args: | + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch_version }} + CUDA_VERSION=${{ matrix.cuda_version }} + LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }} tags: "latest" timeout-minutes: 55 From 111245003da1760415923eed9e683f7ef6008edd Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 05:58:05 +0900 Subject: [PATCH 14/35] Update conda from py3.9-torch1.12 to py3.10-torch.1.12 --- .github/workflows/ci-pytorch_test-conda.yml | 2 +- .github/workflows/cicd-pytorch_dockers.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch_test-conda.yml index 46fcdf5316224..3dfdb365f1201 100644 --- a/.github/workflows/ci-pytorch_test-conda.yml +++ b/.github/workflows/ci-pytorch_test-conda.yml @@ -27,7 +27,7 @@ jobs: - {python-version: "3.8", pytorch-version: "1.9", cuda-version: "11.1"} - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.1"} - {python-version: "3.9", pytorch-version: "1.11", cuda-version: "11.3.1"} - - {python-version: "3.9", pytorch-version: "1.12", cuda-version: "11.3.1"} + - {python-version: "3.10", pytorch-version: "1.12", cuda-version: "11.6.0"} timeout-minutes: 30 diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index f66dc1194712c..a6bf12f1fd5a3 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -152,7 +152,7 @@ jobs: - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} + - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 From e1901f0667ff8feec499a9ef2f7d1d72df476fa7 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 06:00:06 +0900 Subject: [PATCH 15/35] Fix ubuntu version --- .github/workflows/cicd-pytorch_dockers.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index a6bf12f1fd5a3..ef70d0282acc7 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -128,7 +128,6 @@ jobs: PYTHON_VERSION=${{ matrix.python_version }} PYTORCH_VERSION=${{ matrix.pytorch_version }} CUDA_VERSION=${{ matrix.cuda_version }} - UBUNTU_VERSION=${{ matrix.ubuntu_version }} file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} From 07a335cadd454348f3ed53b493f690e1a0a32b5a Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 06:03:36 +0900 Subject: [PATCH 16/35] Revert conda --- .github/workflows/ci-pytorch_test-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch_test-conda.yml index 3dfdb365f1201..46fcdf5316224 100644 --- a/.github/workflows/ci-pytorch_test-conda.yml +++ b/.github/workflows/ci-pytorch_test-conda.yml @@ -27,7 +27,7 @@ jobs: - {python-version: "3.8", pytorch-version: "1.9", cuda-version: "11.1"} - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.1"} - {python-version: "3.9", pytorch-version: "1.11", cuda-version: "11.3.1"} - - {python-version: "3.10", pytorch-version: "1.12", cuda-version: "11.6.0"} + - {python-version: "3.9", pytorch-version: "1.12", cuda-version: "11.3.1"} timeout-minutes: 30 From 58fe92604ef80b904dfd8ce61e827f31d7a4d442 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 06:07:54 +0900 Subject: [PATCH 17/35] revertme: push to hub --- .github/workflows/cicd-pytorch_dockers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index ef70d0282acc7..1d8721f38e6be 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -21,7 +21,7 @@ concurrency: cancel-in-progress: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }} env: - PUSH_TO_HUB: ${{ github.event_name == 'schedule' }} + PUSH_TO_HUB: true jobs: build-pl: From 139e9ea5d7ef85e249b6224856cfc1c077725341 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 06:30:32 +0900 Subject: [PATCH 18/35] Don't build Python 3.10 for now... --- .github/workflows/cicd-pytorch_dockers.yml | 7 ++++--- .github/workflows/release-docker.yml | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 1d8721f38e6be..32c95e62df392 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -43,7 +43,7 @@ jobs: - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} - - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - uses: actions/checkout@v2 @@ -112,7 +112,7 @@ jobs: - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} - - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - uses: actions/checkout@v2 @@ -151,7 +151,8 @@ jobs: - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} + # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index ece010122ed7f..79ceef881cfb0 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -28,7 +28,7 @@ jobs: - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} - - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} + # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - name: Checkout @@ -58,7 +58,7 @@ jobs: - name: Publish Latest to Docker uses: docker/build-push-action@v1.1.0 # Only latest Python and PyTorch - if: matrix.python_version == '3.10' && matrix.pytorch_version == '1.11' + if: matrix.python_version == '3.9' && matrix.pytorch_version == '1.11' with: repository: pytorchlightning/pytorch_lightning username: ${{ secrets.DOCKER_USERNAME }} From 7f3385ebb681330f580e9f933a30c6ff7e9bc9fe Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 13:40:04 +0900 Subject: [PATCH 19/35] Fix pl release builder --- .github/workflows/cicd-pytorch_dockers.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 32c95e62df392..8289c26dc9940 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -53,6 +53,7 @@ jobs: build-args: | PYTHON_VERSION=${{ matrix.python_version }} PYTORCH_VERSION=${{ matrix.pytorch_version }} + CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/release/Dockerfile push: false # pushed in release-docker.yml only when PL is released timeout-minutes: 50 From 109bc2fac6b93a320e30116c325d7eb07952575f Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 13:49:55 +0900 Subject: [PATCH 20/35] updating version contribute to the error? https://github.com/docker/buildx/issues/456 --- .github/workflows/cicd-pytorch_dockers.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 8289c26dc9940..4f1444b8608a1 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -48,7 +48,7 @@ jobs: steps: - uses: actions/checkout@v2 - uses: docker/setup-buildx-action@v2 - - uses: docker/build-push-action@v2 + - uses: docker/build-push-action@v3 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} @@ -74,7 +74,7 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - uses: docker/build-push-action@v2 + - uses: docker/build-push-action@v3 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} @@ -123,7 +123,7 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - uses: docker/build-push-action@v2 + - uses: docker/build-push-action@v3 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} @@ -162,7 +162,7 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - uses: docker/build-push-action@v2 + - uses: docker/build-push-action@v3 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} @@ -198,7 +198,7 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - uses: docker/build-push-action@v2 + - uses: docker/build-push-action@v3 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} @@ -207,7 +207,7 @@ jobs: push: ${{ env.PUSH_TO_HUB }} tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} timeout-minutes: 100 - - uses: docker/build-push-action@v2 + - uses: docker/build-push-action@v3 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} @@ -242,7 +242,7 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - uses: docker/build-push-action@v2 + - uses: docker/build-push-action@v3 with: build-args: | DIST=latest @@ -269,7 +269,7 @@ jobs: uses: actions/checkout@v2 - name: Build Conda Docker # publish master/release - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 with: file: dockers/nvidia/Dockerfile push: false From 01f0d0658ad548c745bfb3ee4f3302b8cfad2964 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 26 Jul 2022 13:52:37 +0900 Subject: [PATCH 21/35] Update actions' versions --- .github/workflows/cicd-pytorch_dockers.yml | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 4f1444b8608a1..762ed002328bf 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -46,7 +46,7 @@ jobs: # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 - uses: docker/build-push-action@v3 with: @@ -67,9 +67,9 @@ jobs: python_version: ["3.7"] xla_version: ["1.11"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 - - uses: docker/login-action@v1 + - uses: docker/login-action@v2 if: env.PUSH_TO_HUB == 'true' with: username: ${{ secrets.DOCKER_USERNAME }} @@ -116,9 +116,9 @@ jobs: # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 - - uses: docker/login-action@v1 + - uses: docker/login-action@v2 if: env.PUSH_TO_HUB == 'true' with: username: ${{ secrets.DOCKER_USERNAME }} @@ -155,9 +155,9 @@ jobs: - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 - - uses: docker/login-action@v1 + - uses: docker/login-action@v2 if: env.PUSH_TO_HUB == 'true' with: username: ${{ secrets.DOCKER_USERNAME }} @@ -191,9 +191,9 @@ jobs: # the config used in 'dockers/ci-runner-ipu/Dockerfile' - {python_version: "3.9", pytorch_version: "1.9"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 - - uses: docker/login-action@v1 + - uses: docker/login-action@v2 if: env.PUSH_TO_HUB == 'true' with: username: ${{ secrets.DOCKER_USERNAME }} @@ -235,9 +235,9 @@ jobs: # the config used in 'dockers/ci-runner-hpu/Dockerfile' - {gaudi_version: "1.5.0", pytorch_version: "1.11.0"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 - - uses: docker/login-action@v1 + - uses: docker/login-action@v2 if: env.PUSH_TO_HUB == 'true' with: username: ${{ secrets.DOCKER_USERNAME }} @@ -266,7 +266,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Build Conda Docker # publish master/release uses: docker/build-push-action@v3 From d9a2a4c0343bba5ccba1f9e38372b9a5b88a4b98 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 30 Jul 2022 04:48:43 +0900 Subject: [PATCH 22/35] Update slack user to notify --- .github/workflows/cicd-pytorch_dockers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index b037c798bc8ee..e642ab89a468f 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -199,7 +199,7 @@ jobs: status: ${{ job.status }} token: ${{ secrets.GITHUB_TOKEN }} notification_title: ${{ format('IPU; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} - message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01BULUS2BG>' # SeanNaren + message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@@U01A5T7EY9M>' # akihironitta env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} From 6db670ec218b18c38628998474ba7306aab981eb Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Wed, 3 Aug 2022 06:47:12 +0900 Subject: [PATCH 23/35] Don't use 11.6.0 to avoid bagua incompatibility --- .github/workflows/cicd-pytorch_dockers.yml | 16 ++++++++-------- .github/workflows/release-docker.yml | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 19eab7fb5a4eb..d5d04a347d51f 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -34,17 +34,17 @@ jobs: - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} + # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 @@ -104,17 +104,17 @@ jobs: - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} + # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 79ceef881cfb0..53cc7101fb666 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -19,17 +19,17 @@ jobs: - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.0"} + - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} + # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: - name: Checkout uses: actions/checkout@v2 From b5643af5c4b4c2da3ec1a1248ec5d447f6b494cb Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Wed, 3 Aug 2022 13:24:06 +0900 Subject: [PATCH 24/35] Don't use 11.1, and use 11.1.1 --- .github/workflows/cicd-pytorch_dockers.yml | 16 ++++++++-------- .github/workflows/release-docker.yml | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index d5d04a347d51f..4540717c4b965 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -31,15 +31,15 @@ jobs: matrix: include: # Include all Python and PyTorch versions that PL supports. - - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} @@ -101,15 +101,15 @@ jobs: include: # These are the base images for PL release docker image distributions, # so include all Python and PyTorch versions that PL supports. - - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} @@ -149,8 +149,8 @@ jobs: fail-fast: false matrix: include: - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} + - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 53cc7101fb666..16a7728d4bd2d 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -16,15 +16,15 @@ jobs: matrix: include: # Include all Python and PyTorch versions that PL supports. - - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1"} + - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} From 72d9b9e47bde4f570f261a26bc285e4feb8b480e Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 5 Aug 2022 13:18:31 +0900 Subject: [PATCH 25/35] Update .github/workflows/ci-pytorch_test-conda.yml Co-authored-by: Luca Medeiros <67411094+luca-medeiros@users.noreply.github.com> --- .github/workflows/ci-pytorch_test-conda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch_test-conda.yml index 59935cb8dfb61..a66c740494b9a 100644 --- a/.github/workflows/ci-pytorch_test-conda.yml +++ b/.github/workflows/ci-pytorch_test-conda.yml @@ -24,8 +24,8 @@ jobs: matrix: # nightly: add when there's a release candidate include: - - {python-version: "3.8", pytorch-version: "1.9", cuda-version: "11.1"} - - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.1"} + - {python-version: "3.8", pytorch-version: "1.9", cuda-version: "11.1.1"} + - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.1.1"} - {python-version: "3.9", pytorch-version: "1.11", cuda-version: "11.3.1"} - {python-version: "3.9", pytorch-version: "1.12", cuda-version: "11.3.1"} From 49dc2fe4a0c146e7f20e458516844cef64856839 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 5 Aug 2022 13:36:26 +0900 Subject: [PATCH 26/35] Update trigger --- .azure/gpu-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml index f64ed65fcddf7..68ba6974a3527 100644 --- a/.azure/gpu-tests.yml +++ b/.azure/gpu-tests.yml @@ -44,7 +44,7 @@ jobs: - bash: | CHANGED_FILES=$(git diff --name-status origin/master -- . | awk '{print $2}') - FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.azure/gpu-*.yml' + FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.azure/gpu-tests.yml' echo $CHANGED_FILES > changed_files.txt MATCHES=$(cat changed_files.txt | grep -E $FILTER) echo $MATCHES From 7e1372e9cc9d206ab00f8ce219bc8d0d6d8c4952 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 5 Aug 2022 13:44:54 +0900 Subject: [PATCH 27/35] Ignore artfacts from tutorials --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 719f291a492ca..259d9f271189c 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,9 @@ hars* artifacts/* *docs/examples* *docs/source-app/api* + +# tutorials +our_model.tar +test.png +saved_models +data/ From cd4a107815bba9cb3e4b0f03a161e06cb27ce668 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 5 Aug 2022 14:59:41 +0900 Subject: [PATCH 28/35] Trim docker images to distribute --- .github/workflows/cicd-pytorch_dockers.yml | 27 ++++------------------ .github/workflows/release-docker.yml | 14 ++--------- 2 files changed, 6 insertions(+), 35 deletions(-) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index e30e475baeafb..e9e86ede2e1b0 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -30,21 +30,12 @@ jobs: fail-fast: false matrix: include: - # Include all Python and PyTorch versions that PL supports. - - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} + # We only release one docker image per PyTorch version. + # The matrix here is the same as the one in release-docker.yml. - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 @@ -99,22 +90,12 @@ jobs: fail-fast: false matrix: include: - # These are the base images for PL release docker image distributions, - # so include all Python and PyTorch versions that PL supports. - - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} + # These are the base images for PL release docker images, + # so include at least all of the combinations in release-dockers.yml. - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 16a7728d4bd2d..6901a24204683 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -15,21 +15,11 @@ jobs: fail-fast: false matrix: include: - # Include all Python and PyTorch versions that PL supports. - - {python_version: "3.7", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.7", pytorch_version: "1.12", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.8", pytorch_version: "1.12", cuda_version: "11.3.1"} + # We only release one docker image per PyTorch version. - {python_version: "3.9", pytorch_version: "1.9", cuda_version: "11.1.1"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.11", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: - name: Checkout uses: actions/checkout@v2 @@ -58,7 +48,7 @@ jobs: - name: Publish Latest to Docker uses: docker/build-push-action@v1.1.0 # Only latest Python and PyTorch - if: matrix.python_version == '3.9' && matrix.pytorch_version == '1.11' + if: matrix.python_version == '3.9' && matrix.pytorch_version == '1.12' with: repository: pytorchlightning/pytorch_lightning username: ${{ secrets.DOCKER_USERNAME }} From 4b814e794a4783869939bf48839d523c0dddd335 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 5 Aug 2022 15:23:20 +0900 Subject: [PATCH 29/35] Add an image for tutorials --- .github/workflows/cicd-pytorch_dockers.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index e9e86ede2e1b0..100c7f13ab07b 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -96,6 +96,8 @@ jobs: - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} + # Used in Lightning-AI/tutorials + - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 From d134b690e04ff1cd3fa7084b2ed639f7d92ca6d3 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 6 Aug 2022 11:03:31 +0900 Subject: [PATCH 30/35] Update conda image 3.8x1.10 --- .github/workflows/ci-pytorch_test-conda.yml | 2 +- .github/workflows/cicd-pytorch_dockers.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-pytorch_test-conda.yml b/.github/workflows/ci-pytorch_test-conda.yml index a66c740494b9a..213add66dfdc6 100644 --- a/.github/workflows/ci-pytorch_test-conda.yml +++ b/.github/workflows/ci-pytorch_test-conda.yml @@ -25,7 +25,7 @@ jobs: # nightly: add when there's a release candidate include: - {python-version: "3.8", pytorch-version: "1.9", cuda-version: "11.1.1"} - - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.1.1"} + - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.3.1"} - {python-version: "3.9", pytorch-version: "1.11", cuda-version: "11.3.1"} - {python-version: "3.9", pytorch-version: "1.12", cuda-version: "11.3.1"} diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 100c7f13ab07b..231a6fce03b4f 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -133,10 +133,9 @@ jobs: matrix: include: - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1.1"} + - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} - # - {python_version: "3.10", pytorch_version: "1.12", cuda_version: "11.6.0"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 From 3a10724807ee75890af8bb5c8a3359875ba60452 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 6 Aug 2022 13:34:37 +0900 Subject: [PATCH 31/35] Try different conda variants --- .github/workflows/cicd-pytorch_dockers.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/cicd-pytorch_dockers.yml b/.github/workflows/cicd-pytorch_dockers.yml index 231a6fce03b4f..26826959cca07 100644 --- a/.github/workflows/cicd-pytorch_dockers.yml +++ b/.github/workflows/cicd-pytorch_dockers.yml @@ -133,7 +133,10 @@ jobs: matrix: include: - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} + - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1.1"} + - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} + - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} steps: From b7188dd79c1a7e66f3d5558434ebac2c8f413b63 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Wed, 10 Aug 2022 11:39:24 +0900 Subject: [PATCH 32/35] No need to set cuda for conda jobs --- .github/workflows/ci-pytorch-test-conda.yml | 12 +++++------- .github/workflows/cicd-pytorch-dockers.yml | 14 +++++--------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci-pytorch-test-conda.yml b/.github/workflows/ci-pytorch-test-conda.yml index 213add66dfdc6..ac16920947df0 100644 --- a/.github/workflows/ci-pytorch-test-conda.yml +++ b/.github/workflows/ci-pytorch-test-conda.yml @@ -18,17 +18,15 @@ defaults: jobs: conda: runs-on: ubuntu-20.04 - container: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-cuda${{ matrix.cuda-version }} + container: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }} strategy: fail-fast: false matrix: - # nightly: add when there's a release candidate include: - - {python-version: "3.8", pytorch-version: "1.9", cuda-version: "11.1.1"} - - {python-version: "3.8", pytorch-version: "1.10", cuda-version: "11.3.1"} - - {python-version: "3.9", pytorch-version: "1.11", cuda-version: "11.3.1"} - - {python-version: "3.9", pytorch-version: "1.12", cuda-version: "11.3.1"} - + - {python-version: "3.8", pytorch-version: "1.9"} + - {python-version: "3.8", pytorch-version: "1.10"} + - {python-version: "3.9", pytorch-version: "1.11"} + - {python-version: "3.9", pytorch-version: "1.12"} timeout-minutes: 30 steps: diff --git a/.github/workflows/cicd-pytorch-dockers.yml b/.github/workflows/cicd-pytorch-dockers.yml index f503106741dfb..2267db8f6747f 100644 --- a/.github/workflows/cicd-pytorch-dockers.yml +++ b/.github/workflows/cicd-pytorch-dockers.yml @@ -132,13 +132,10 @@ jobs: fail-fast: false matrix: include: - - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1.1"} - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1.1"} - - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.3.1"} + - {python_version: "3.8", pytorch_version: "1.9"} + - {python_version: "3.8", pytorch_version: "1.10"} + - {python_version: "3.9", pytorch_version: "1.11"} + - {python_version: "3.9", pytorch_version: "1.12"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 @@ -152,10 +149,9 @@ jobs: build-args: | PYTHON_VERSION=${{ matrix.python_version }} PYTORCH_VERSION=${{ matrix.pytorch_version }} - CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-conda/Dockerfile push: ${{ env.PUSH_TO_HUB }} - tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }} + tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v1 if: failure() && env.PUSH_TO_HUB == 'true' From 7f61fcdfe1dd90d2ff5e3697d563ac64a57711cf Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Wed, 10 Aug 2022 12:10:58 +0900 Subject: [PATCH 33/35] Update who to notify ipu failure --- .github/workflows/cicd-pytorch-dockers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd-pytorch-dockers.yml b/.github/workflows/cicd-pytorch-dockers.yml index 2267db8f6747f..b246868a63bd9 100644 --- a/.github/workflows/cicd-pytorch-dockers.yml +++ b/.github/workflows/cicd-pytorch-dockers.yml @@ -203,7 +203,7 @@ jobs: status: ${{ job.status }} token: ${{ secrets.GITHUB_TOKEN }} notification_title: ${{ format('IPU; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} - message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@@U01A5T7EY9M>' # akihironitta + message_format: '{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01GD29QCAV>' # kaushikb11 env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} From 6431d0e5861f72cd20fbe590c5b8eec10d702544 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Wed, 10 Aug 2022 12:11:36 +0900 Subject: [PATCH 34/35] Don't push --- .github/workflows/cicd-pytorch-dockers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd-pytorch-dockers.yml b/.github/workflows/cicd-pytorch-dockers.yml index b246868a63bd9..84051cafd82d8 100644 --- a/.github/workflows/cicd-pytorch-dockers.yml +++ b/.github/workflows/cicd-pytorch-dockers.yml @@ -21,7 +21,7 @@ concurrency: cancel-in-progress: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }} env: - PUSH_TO_HUB: true + PUSH_TO_HUB: ${{ github.event_name == 'schedule' }} jobs: build-pl: From 5a918444f472aa5e688f623b494c7ae42eb00054 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Wed, 10 Aug 2022 12:12:23 +0900 Subject: [PATCH 35/35] update filenaem --- .github/workflows/ci-pytorch-test-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-pytorch-test-conda.yml b/.github/workflows/ci-pytorch-test-conda.yml index ac16920947df0..2bbdb699c2c1e 100644 --- a/.github/workflows/ci-pytorch-test-conda.yml +++ b/.github/workflows/ci-pytorch-test-conda.yml @@ -43,7 +43,7 @@ jobs: id: skip shell: bash -l {0} run: | - FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.github/workflows/ci-pytorch_test-conda.yml' + FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*|.github/workflows/ci-pytorch-test-conda.yml' echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt MATCHES=$(cat changed_files.txt | grep -E $FILTER) echo $MATCHES