diff --git a/.github/actions/build-vllm/action.yml b/.github/actions/build-vllm/action.yml index 2ec3f8d..c956382 100644 --- a/.github/actions/build-vllm/action.yml +++ b/.github/actions/build-vllm/action.yml @@ -23,6 +23,27 @@ runs: - name: Expose cache environment variables uses: ./.github/actions/common/expose-cache-vars + - name: Compute vLLM version + uses: actions/github-script@v7 + with: + script: | + // ref + const ref = core.getInput('ref'); + + // export version for setuptools_scm + if (ref.startsWith("v")) { + core.exportVariable('SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM', ref.slice(1)); + } else if (ref === "main") { + core.exportVariable('SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM', '999.999.999'); + } + + // export version for docker + if (ref === "main") { + core.exportVariable('DOCKER_TAGS', `ghcr.io/${context.repo.owner}/vllm:latest`); + } else { + core.exportVariable('DOCKER_TAGS', `ghcr.io/${context.repo.owner}/vllm:${ref}`) + } + - name: Checkout ${{ inputs.repository }} (${{ inputs.ref }}) uses: actions/checkout@v4 with: @@ -56,7 +77,7 @@ runs: context: ${{ inputs.repository }}/${{ inputs.ref }} outputs: type=tar,dest=build.tar push: false - tags: ghcr.io/${{ github.repository_owner }}/vllm:${{ inputs.ref }} + tags: ${{ env.DOCKER_TAGS }} target: build build-args: | @@ -68,6 +89,7 @@ runs: secrets: | ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }} ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }} + SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM=${{ env.SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM }} - name: Copy wheel files shell: bash @@ -94,7 +116,7 @@ runs: cache-from: type=local,src=/var/tmp/buildx-cache context: ${{ inputs.repository }}/${{ inputs.ref }} push: true - tags: ghcr.io/${{ github.repository_owner }}/vllm:${{ inputs.ref }} + tags: ${{ env.DOCKER_TAGS }} target: vllm-openai build-args: | diff --git a/.github/workflows/vllm-pascal-main.yml b/.github/workflows/vllm-pascal-main.yml index 53c1dd7..57a6f37 100644 --- a/.github/workflows/vllm-pascal-main.yml +++ b/.github/workflows/vllm-pascal-main.yml @@ -28,7 +28,7 @@ jobs: - name: Publish wheels uses: ./.github/actions/common/publish-wheels with: - prefix: vllm_pascal-0.1.dev + prefix: vllm_pascal-999.999.999+ update-index: needs: publish diff --git a/README.md b/README.md index e32047c..5b21dcb 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,8 @@ The main repository for building Pascal-compatible versions of ML applications a ```sh # Pull the vLLM image -docker pull ghcr.io/sasha0552/vllm:v0.6.3 # note that the tag "latest" is not supported. - # use the tag "main" for the nightly version. +docker pull ghcr.io/sasha0552/vllm:v0.6.3 # you can omit the version specifier + # to install nightly version # You can now follow the official vLLM documentation. # Replace the official image with this one. @@ -72,7 +72,8 @@ python -m venv venv source venv/bin/activate # Install vLLM -pip3 install vllm-pascal +pip3 install vllm-pascal==0.6.3 # you can omit the version specifier + # to install nightly version # Install patched triton transient-package install \ diff --git a/patches/vllm-project/vllm/v0.5.5/0000-sccache.patch b/patches/vllm-project/vllm/v0.5.5/0000-environment-variables.patch similarity index 85% rename from patches/vllm-project/vllm/v0.5.5/0000-sccache.patch rename to patches/vllm-project/vllm/v0.5.5/0000-environment-variables.patch index 20ae1c1..9bf45b4 100644 --- a/patches/vllm-project/vllm/v0.5.5/0000-sccache.patch +++ b/patches/vllm-project/vllm/v0.5.5/0000-environment-variables.patch @@ -1,11 +1,12 @@ --- a/Dockerfile +++ b/Dockerfile -@@ -89,14 +89,15 @@ ARG SCCACHE_BUCKET_NAME=vllm-build-sccache +@@ -89,14 +89,16 @@ ARG SCCACHE_BUCKET_NAME=vllm-build-sccache ARG SCCACHE_REGION_NAME=us-west-2 # if USE_SCCACHE is set, use sccache to speed up compilation RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=secret,id=ACTIONS_CACHE_URL,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,env=ACTIONS_RUNTIME_TOKEN \ ++ --mount=type=secret,id=SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM,env=SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM \ if [ "$USE_SCCACHE" = "1" ]; then \ echo "Installing sccache..." \ && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \ diff --git a/patches/vllm-project/vllm/v0.6.2/0000-sccache.patch b/patches/vllm-project/vllm/v0.6.2/0000-environment-variables.patch similarity index 87% rename from patches/vllm-project/vllm/v0.6.2/0000-sccache.patch rename to patches/vllm-project/vllm/v0.6.2/0000-environment-variables.patch index 79258a2..82809d1 100644 --- a/patches/vllm-project/vllm/v0.6.2/0000-sccache.patch +++ b/patches/vllm-project/vllm/v0.6.2/0000-environment-variables.patch @@ -1,11 +1,12 @@ --- a/Dockerfile +++ b/Dockerfile -@@ -86,15 +86,15 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0 +@@ -86,15 +86,16 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0 # if USE_SCCACHE is set, use sccache to speed up compilation RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=.git,target=.git \ + --mount=type=secret,id=ACTIONS_CACHE_URL,env=ACTIONS_CACHE_URL \ + --mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,env=ACTIONS_RUNTIME_TOKEN \ ++ --mount=type=secret,id=SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM,env=SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM \ if [ "$USE_SCCACHE" = "1" ]; then \ echo "Installing sccache..." \ && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \