From 5c2b7cadcdce49dc2d85f75089a254f20ba2510d Mon Sep 17 00:00:00 2001 From: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Date: Thu, 26 Aug 2021 15:10:59 +0530 Subject: [PATCH] Remove ci tpu test from github workflows (#8965) Co-authored-by: Jirka --- .github/workflows/ci_test-tpu.yml | 144 ------------------------------ README.md | 16 ++-- 2 files changed, 8 insertions(+), 152 deletions(-) delete mode 100644 .github/workflows/ci_test-tpu.yml diff --git a/.github/workflows/ci_test-tpu.yml b/.github/workflows/ci_test-tpu.yml deleted file mode 100644 index 22bb7bd7cd4e5..0000000000000 --- a/.github/workflows/ci_test-tpu.yml +++ /dev/null @@ -1,144 +0,0 @@ -name: TPU tests - -on: - push: - branches: [master, "release/*"] -# TODO: temporal disable TPU testing until we find way how to pass credentials to forked PRs -# pull_request: -# branches: -# - master - -env: - GKE_CLUSTER: lightning-cluster - GKE_ZONE: us-central1-a - IMAGE: gcr.io/${{ secrets.GKE_PROJECT }}/tpu-testing-image - MAX_CHECKS: 360 - CHECK_SPEEP: 5 - -jobs: - setup-build-publish-deploy: - name: tpu-testing-job - runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - python-version: [3.7] - xla-version: [1.6, 1.8] - # Timeout: https://stackoverflow.com/a/59076067/4521646 - timeout-minutes: 50 - - steps: - - name: Set IMAGETAG - run: echo "IMAGETAG=$(date +%s)_${{ matrix.python-version }}" >> $GITHUB_ENV - - name: Install Go - uses: actions/setup-go@v2 - with: - go-version: 1.14.x - - name: Set up Python 3.7 - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - - name: Checkout Pytorch Lightning - uses: actions/checkout@v2 - with: - repository: PyTorchLightning/pytorch-lightning - ref: ${{ github.event.pull_request.head.sha }} - - - name: Checkout ml-testing-accelerators - uses: actions/checkout@v2 - with: - repository: GoogleCloudPlatform/ml-testing-accelerators - path: ml-testing-accelerators - ref: 5e88ac24f631c27045e62f0e8d5dfcf34e425e25 - - - name: Setup gcloud CLI - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master - with: - version: '290.0.1' - service_account_key: ${{ secrets.GKE_SA_KEY_BASE64 }} - project_id: ${{ secrets.GKE_PROJECT }} - export_default_credentials: true - - # Configure Docker to use the gcloud command-line tool as a credential helper for authentication. - - name: Configure Docker - run: |- - gcloud --quiet auth configure-docker - shell: bash - - name: Build and Push Docker Image - env: - PYTHON_VER: ${{ matrix.python-version }} - XLA_VER: ${{ matrix.xla-version }} - run: | - #cd dockers/tpu-tests - docker build --tag "$IMAGE:$IMAGETAG" -f ./dockers/tpu-tests/Dockerfile --build-arg "PYTHON_VERSION=$PYTHON_VER" --build-arg "PYTORCH_VERSION=$XLA_VER" . - docker push "$IMAGE:$IMAGETAG" - shell: bash - - - name: Install jsonnet - run: |- - go get github.com/google/go-jsonnet/cmd/jsonnet - shell: bash - # Get the GKE credentials so we can deploy to the cluster - # Use either zone or region depending on cluster setup. - - run: |- - gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE" - shell: bash - - - name: Deploy the job on the kubernetes cluster - env: - XLA_VER: ${{ matrix.xla-version }} - run: |- - python -c "fname = 'dockers/tpu-tests/tpu_test_cases.jsonnet' ; ttt = open(fname).read().replace('pytorch-VERSION', 'pytorch-$XLA_VER') ; open(fname, 'w').write(ttt)" - job_name=$(jsonnet -J ml-testing-accelerators/ dockers/tpu-tests/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$IMAGETAG | kubectl create -f -) && \ - job_name=${job_name#job.batch/} && \ - job_name=${job_name% created} && \ - echo "Waiting on kubernetes job: $job_name in cluster: $GKE_CLUSTER" && \ - i=0 && \ - # 60 checks spaced 30s apart = 900s total. - status_code=2 && \ - # Check on the job periodically. Set the status code depending on what - # happened to the job in Kubernetes. If we try MAX_CHECKS times and - # still the job hasn't finished, give up and return the starting - # non-zero status code. - printf "Waiting for job to finish: " && \ - while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "." ; fi; sleep $CHECK_SPEEP; done && \ - echo "Done waiting. Job status code: $status_code" && \ - pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \ - echo "GKE pod name: $pod_name" && \ - kubectl logs -f $pod_name --container=train > /tmp/full_output.txt - if grep -q '' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '//'; else mv /tmp/full_output.txt xx00; fi && \ - # First portion is the test logs. Print these to Github Action stdout. - cat xx00 && \ - echo "Done with log retrieval attempt." && \ - gcloud container images delete "$IMAGE:$IMAGETAG" --force-delete-tags && \ - echo "Status code: $status_code" - exit $status_code - shell: bash - - - name: Statistics - if: success() - run: | - mv ./xx01 coverage - # TODO: add human readable report - cat coverage - # sudo pip install pycobertura - # pycobertura show coverage.xml - - - name: Upload coverage results - uses: actions/upload-artifact@v2 - with: - name: coverage-TPU - path: coverage - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 - # see: https://github.com/actions/toolkit/issues/399 - continue-on-error: true - if: always() - with: - token: ${{ secrets.CODECOV_TOKEN }} - file: coverage - flags: tpu,pytest - name: TPU-coverage - fail_ci_if_error: true diff --git a/README.md b/README.md index d31d652850458..c80995293ae9d 100644 --- a/README.md +++ b/README.md @@ -78,14 +78,14 @@ Lightning is rigorously tested across multiple GPUs, TPUs CPUs and against major
-| System / PyTorch ver. | 1.6 (min. req.) | 1.7 | 1.8 (LTS) | 1.9 (latest) | 1.10 (nightly) | -| :----------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Conda py3.7 \[linux\] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | -| Linux py3.7 \[GPUs\*\*\] | - | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | -| Linux py3.{6,7} \[TPUs\*\*\*\] | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - | - | -| Linux py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | -| OSX py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | -| Windows py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | +| System / PyTorch ver. | 1.6 (min. req.) | 1.7 | 1.8 (LTS) | 1.9 (latest) | 1.10 (nightly) | +| :------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Conda py3.7 \[linux\] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | +| Linux py3.7 \[GPUs\*\*\] | - | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | +| Linux py3.7 \[TPUs\*\*\*\] | - | - | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - | +| Linux py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | +| OSX py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | +| Windows py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - _\*\* tests run on two NVIDIA P100_ - _\*\*\* tests run on Google GKE TPUv2/3_