From e210f3fe60090a8a77fa261b5b2bef73493e2022 Mon Sep 17 00:00:00 2001 From: Hao Zhang Date: Sun, 3 Jul 2022 19:03:40 -0700 Subject: [PATCH] [CI/CD] New PyPI Infra [skip ci] (#577) --- .github/workflows/build_jaxlib.yml | 2 +- .github/workflows/docs.yml | 1 + .github/workflows/release_jaxlib.yml | 84 +++++++++++---- build_jaxlib/release/README.md | 14 +++ build_jaxlib/release/generate_pypi_index.py | 101 ++++++++++++++++++ build_jaxlib/release/wheel_upload.py | 48 +++++++++ .../scripts/build_jaxlib_docker_entrypoint.sh | 15 ++- 7 files changed, 243 insertions(+), 22 deletions(-) create mode 100644 build_jaxlib/release/README.md create mode 100644 build_jaxlib/release/generate_pypi_index.py create mode 100644 build_jaxlib/release/wheel_upload.py diff --git a/.github/workflows/build_jaxlib.yml b/.github/workflows/build_jaxlib.yml index 33c19445f..1a63ed183 100644 --- a/.github/workflows/build_jaxlib.yml +++ b/.github/workflows/build_jaxlib.yml @@ -45,7 +45,7 @@ jobs: mkdir -p dist docker run --gpus all --tmpfs /build:exec \ --rm -v $(pwd)/dist:/dist build-jaxlib-image \ - 3.8 cuda 11.1 ${TF_BRANCH##*/} + 3.8 cuda 11.1 main ${TF_BRANCH##*/} # change this to publishing to pypi - name: Publish to local diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a192f198a..bdb5ce3af 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -35,3 +35,4 @@ jobs: external_repository: alpa-projects/alpa-projects.github.io publish_branch: master publish_dir: /data/alpa-dist/docs + keep_files: true diff --git a/.github/workflows/release_jaxlib.yml b/.github/workflows/release_jaxlib.yml index 26ff3c177..49e3a41ed 100644 --- a/.github/workflows/release_jaxlib.yml +++ b/.github/workflows/release_jaxlib.yml @@ -4,10 +4,11 @@ on: release: types: [created] workflow_dispatch: - -env: - TWINE_USERNAME: "__token__" - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + inputs: + tensorflow: + description: 'TensorFlow-alpa branch to build' + required: true + default: 'master' jobs: @@ -41,26 +42,71 @@ jobs: - name: Compile Jaxlib run: | mkdir -p /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.} - - docker run --gpus all --tmpfs /build:exec \ - --rm -v /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.}:/dist \ - build-jaxlib-image-cuda${CUDA_VERSION} ${PYTHON_VERSION} \ - cuda ${CUDA_VERSION} + echo "Compile Python ${PYTHON_VERSION}, CUDA ${CUDA_VERSION}, ALPA BRANCH: ${ALPA_BRANCH}, TF_BRANCH: ${TF_BRANCH}" + if [[ ${{ github.event_name }} == "release" ]]; then + docker run --gpus all --tmpfs /build:exec \ + --rm -v /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.}:/dist \ + build-jaxlib-image-cuda${CUDA_VERSION} ${PYTHON_VERSION} \ + cuda ${CUDA_VERSION} ${ALPA_BRANCH} + else + docker run --gpus all --tmpfs /build:exec \ + --rm -v /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.}:/dist \ + build-jaxlib-image-cuda${CUDA_VERSION} ${PYTHON_VERSION} \ + cuda ${CUDA_VERSION} ${ALPA_BRANCH} ${TF_BRANCH} + fi env: CUDA_VERSION: ${{ matrix.cuda }} PYTHON_VERSION: ${{ matrix.python }} + ALPA_BRANCH: ${{ github.ref }} + TF_BRANCH: ${{ github.event.inputs.tensorflow }} + + - name: Move CUDA${{ matrix.cuda }} + run: | + echo "Move to one single folder" + ls /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.} + mv /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.}/*.whl /data/alpa-pypi/packages/ + env: + CUDA_VERSION: ${{ matrix.cuda }} publish: runs-on: [self-hosted] needs: [build-jaxlib] - strategy: - matrix: - cuda: ["11.1", "11.2", "11.3"] steps: - - name: Publish CUDA${{ matrix.cuda }} - run: | - echo "Move to self-hosted pypi" - ls /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.} - mv /data/alpa-dist/jaxlib-alpa/cuda${CUDA_VERSION//.}/*.whl /data/alpa-pypi/packages/ - env: - CUDA_VERSION: ${{ matrix.cuda }} + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install github3.py requests + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Get latest tag + id: latesttag + uses: "WyriHaximus/github-action-get-previous-tag@v1" + + - name: Upload wheels + run: | + echo "Upload wheels to tag ${TAG}" + ls /data/alpa-pypi/packages/ + python build_jaxlib/release/wheel_upload.py --tag ${TAG} --path /data/alpa-pypi/packages/ + env: + GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }} + TAG: ${{ steps.latesttag.outputs.tag }} + + - name: "Generate and update PyPI index" + env: + GITHUB_TOKEN: ${{ secrets.PAT_TOKEN }} + TAG: ${{ steps.latesttag.outputs.tag }} + run: | + git clone https://$GITHUB_TOKEN@github.com/alpa-projects/alpa-projects.github.io + cd alpa-projects.github.io + git config user.name github-actions + git config user.email github-actions@github.com + cd .. + python build_jaxlib/release/generate_pypi_index.py --tag ${TAG} diff --git a/build_jaxlib/release/README.md b/build_jaxlib/release/README.md new file mode 100644 index 000000000..12e4fc230 --- /dev/null +++ b/build_jaxlib/release/README.md @@ -0,0 +1,14 @@ +# How to Release JaxLib and generate a PyPI Index + +1. Upload jaxlib wheels as assets under a release tag. +```shell +GITHUB_TOKEN=[ADMIN_TOKEN] python wheel_upload.py --tag [TAG] --path [PATH_TO_WHEELS] +``` + +2. Generate a html index page and commit it to the master branch of Alpa doc repository. +```shell +GITHUB_TOKEN=[ADMIN_TOKEN] python generate_pypi_index.py --tag [TAG] +``` +All wheel assets under `[TAG]` will be included in a html index page appeared in the doc repo. + +Please make sure the TAG is aligned in Step 1 and Step 2. diff --git a/build_jaxlib/release/generate_pypi_index.py b/build_jaxlib/release/generate_pypi_index.py new file mode 100644 index 000000000..c749e7891 --- /dev/null +++ b/build_jaxlib/release/generate_pypi_index.py @@ -0,0 +1,101 @@ +"""Generate and upload a PyPI index page given a tag.""" +import os +import logging +import argparse +import subprocess +from datetime import datetime + +import github3 +import github3.session as session +import requests + + +def py_str(cstr): + return cstr.decode("utf-8") + + +def url_is_valid(url): + """Check if a given URL is valid, i.e. it returns 200 OK when requested.""" + r = requests.get(url) + + if r.status_code != 200: + print("Warning: HTTP code %s for url %s" % (r.status_code, url)) + + return r.status_code == 200 + + +def list_wheels(repo, tag): + gh = github3.GitHub(token=os.environ["GITHUB_TOKEN"], + session=session.GitHubSession(default_connect_timeout=100, default_read_timeout=100)) + repo = gh.repository(*repo.split("/")) + wheels = [] + all_tags = [release.tag_name for release in repo.releases()] + if tag not in all_tags: + raise RuntimeError("The tag provided does not exist.") + release = repo.release_from_tag(tag) + for asset in release.assets(): + print(f"Validating {asset.name} with url: {asset.browser_download_url}") + if asset.name.endswith(".whl") and url_is_valid(asset.browser_download_url): + wheels.append(asset) + return wheels + + +def update_wheel_page(keep_list, site_repo, tag, dry_run=False): + """Update the wheel page""" + new_html = "" + for asset in keep_list: + new_html += '%s
\n' % ( + asset.browser_download_url, + asset.name, + ) + + def run_cmd(cmd): + proc = subprocess.Popen( + cmd, cwd=site_repo, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + (out, _) = proc.communicate() + if proc.returncode != 0: + msg = "git error: %s" % cmd + msg += py_str(out) + raise RuntimeError(msg) + + run_cmd(["git", "fetch"]) + run_cmd(["git", "checkout", "-B", "master", "origin/master"]) + wheel_html_path = os.path.join(site_repo, "wheels.html") + if not os.path.exists(wheel_html_path) or open(wheel_html_path, "r").read() != new_html: + print(f"Wheel page changed, update {wheel_html_path}..") + if not dry_run: + open(wheel_html_path, "w").write(new_html) + run_cmd(["git", "add", "wheels.html"]) + run_cmd(["git", "commit", "-am", + f"wheel update at {datetime.now()} from tag {tag}"]) + run_cmd(["git", "push", "origin", "master"]) + + +def delete_assets(remove_list, dry_run): + for asset in remove_list: + if not dry_run: + asset.delete() + if remove_list: + print("Finish deleting %d removed assets" % len(remove_list)) + + +def main(): + logging.basicConfig(level=logging.WARNING) + parser = argparse.ArgumentParser( + description="Generate a wheel page given a release tag, assuming the wheels have been uploaded." + ) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--site-path", type=str, default="alpa-projects.github.io") + parser.add_argument("--repo", type=str, default="alpa-projects/alpa") + parser.add_argument("--tag", type=str) + + if "GITHUB_TOKEN" not in os.environ: + raise RuntimeError("need GITHUB_TOKEN") + args = parser.parse_args() + wheels = list_wheels(args.repo, args.tag) + update_wheel_page(wheels, args.site_path, args.tag, args.dry_run) + + +if __name__ == "__main__": + main() diff --git a/build_jaxlib/release/wheel_upload.py b/build_jaxlib/release/wheel_upload.py new file mode 100644 index 000000000..b254fafe2 --- /dev/null +++ b/build_jaxlib/release/wheel_upload.py @@ -0,0 +1,48 @@ +"""Update the wheels page, prune old nightly builds if necessary (source from tlcpack).""" +import github3 +import github3.session as session +import os +import logging +import argparse + + +def upload(args, path): + # gh = github3.login(token=os.environ["GITHUB_TOKEN"]) + gh = github3.GitHub(token=os.environ["GITHUB_TOKEN"], + session=session.GitHubSession(default_connect_timeout=100, default_read_timeout=100)) + repo = gh.repository(*args.repo.split("/")) + release = repo.release_from_tag(args.tag) + name = os.path.basename(path) + content_bytes = open(path, "rb").read() + + for asset in release.assets(): + if asset.name == name: + if not args.dry_run: + asset.delete() + print(f"Remove duplicated file {name}") + print(f"Start to upload {path} to {args.repo}, this can take a while...") + if not args.dry_run: + release.upload_asset("application/octet-stream", name, content_bytes) + print(f"Finish uploading {path}") + + +def main(): + logging.basicConfig(level=logging.WARNING) + parser = argparse.ArgumentParser(description="Upload wheel as an asset of a tag.") + parser.add_argument("--tag", type=str) + parser.add_argument("--repo", type=str, default="alpa-projects/alpa") + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--path", type=str) + + if "GITHUB_TOKEN" not in os.environ: + raise RuntimeError("need GITHUB_TOKEN") + args = parser.parse_args() + if os.path.isdir(args.path): + for name in os.listdir(args.path): + if name.endswith(".whl"): + upload(args, os.path.join(args.path, name)) + else: + upload(args, args.path) + +if __name__ == "__main__": + main() diff --git a/docker/scripts/build_jaxlib_docker_entrypoint.sh b/docker/scripts/build_jaxlib_docker_entrypoint.sh index 317169851..bcf7a38b9 100644 --- a/docker/scripts/build_jaxlib_docker_entrypoint.sh +++ b/docker/scripts/build_jaxlib_docker_entrypoint.sh @@ -13,7 +13,7 @@ export CUDA_PATH=/usr/local/cuda export LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH usage() { - echo "usage: ${0##*/} [3.7|3.8|3.9] [cuda|nocuda] [11.1|11.2|11.3] [tensorflow-alpa branch name]" + echo "usage: ${0##*/} [3.7|3.8|3.9] [cuda|nocuda] [11.1|11.2|11.3] [alpa branch name] [tensorflow-alpa branch name]" exit 1 } @@ -28,9 +28,20 @@ echo "Python version $PY_VERSION" # switch tensorflow-alpa branch if necessary git clone --recursive https://github.com/alpa-projects/alpa.git +# switch alpa branch if [[ $# -eq 4 ]] then - TF_BRANCH="$4" + ALPA_BRANCH="$4" + echo "Switch to alpa branch ALPA_BRANCH" + cd /build/alpa + git fetch origin +${ALPA_BRANCH} + git checkout -qf FETCH_HEAD + git submodule update --recursive + +# switch tensorflow-alpa branch, this will overwrite the above +if [[ $# -eq 5 ]] +then + TF_BRANCH="$5" echo "Switch to tensorflow-alpa branch $TF_BRANCH" cd /build/alpa/third_party/tensorflow-alpa git fetch origin +${TF_BRANCH}