diff --git a/patches/vllm-project/vllm/v0.5.5/0001-pascal-support.patch b/patches/vllm-project/vllm/v0.5.5/0001-pascal-support.patch index b758ba3..9f8eee5 100644 --- a/patches/vllm-project/vllm/v0.5.5/0001-pascal-support.patch +++ b/patches/vllm-project/vllm/v0.5.5/0001-pascal-support.patch @@ -9,3 +9,17 @@ # Supported AMD GPU architectures. set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100") +--- a/Dockerfile ++++ b/Dockerfile +@@ -178,7 +178,10 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ + # install vllm wheel first, so that torch etc will be installed + RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ + --mount=type=cache,target=/root/.cache/pip \ +- python3 -m pip install dist/*.whl --verbose ++ python3 -m pip install dist/*.whl --verbose && \ ++ export PIP_EXTRA_INDEX_URL="https://sasha0552.github.io/pascal-pkgs-ci/" && \ ++ python3 -m pip install transient-package && \ ++ transient-package install --source triton --target triton-pascal + + RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamba \ + --mount=type=cache,target=/root/.cache/pip \ diff --git a/patches/vllm-project/vllm/v0.5.5/0002-docker-install-patched-triton.patch b/patches/vllm-project/vllm/v0.5.5/0002-docker-install-patched-triton.patch deleted file mode 100644 index f869c57..0000000 --- a/patches/vllm-project/vllm/v0.5.5/0002-docker-install-patched-triton.patch +++ /dev/null @@ -1,14 +0,0 @@ ---- a/Dockerfile -+++ b/Dockerfile -@@ -178,7 +178,10 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ - # install vllm wheel first, so that torch etc will be installed - RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ - --mount=type=cache,target=/root/.cache/pip \ -- python3 -m pip install dist/*.whl --verbose -+ python3 -m pip install dist/*.whl --verbose && \ -+ export PIP_EXTRA_INDEX_URL="https://sasha0552.github.io/pascal-pkgs-ci/" && \ -+ python3 -m pip install transient-package && \ -+ transient-package install --source triton --target triton-pascal - - RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamba \ - --mount=type=cache,target=/root/.cache/pip \ diff --git a/patches/vllm-project/vllm/v0.6.2/0000-sccache.patch b/patches/vllm-project/vllm/v0.6.2/0000-sccache.patch index aad2191..79258a2 100644 --- a/patches/vllm-project/vllm/v0.6.2/0000-sccache.patch +++ b/patches/vllm-project/vllm/v0.6.2/0000-sccache.patch @@ -1,28 +1,21 @@ --- a/Dockerfile +++ b/Dockerfile -@@ -80,9 +80,9 @@ ARG nvcc_threads=8 - ENV NVCC_THREADS=$nvcc_threads - - ARG USE_SCCACHE --ARG SCCACHE_BUCKET_NAME=vllm-build-sccache --ARG SCCACHE_REGION_NAME=us-west-2 --ARG SCCACHE_S3_NO_CREDENTIALS=0 -+ARG ACTIONS_CACHE_URL -+ARG ACTIONS_RUNTIME_TOKEN -+ARG SCCACHE_GHA_ENABLED=0 +@@ -86,15 +86,15 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0 # if USE_SCCACHE is set, use sccache to speed up compilation RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=.git,target=.git \ -@@ -92,9 +92,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \ ++ --mount=type=secret,id=ACTIONS_CACHE_URL,env=ACTIONS_CACHE_URL \ ++ --mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,env=ACTIONS_RUNTIME_TOKEN \ + if [ "$USE_SCCACHE" = "1" ]; then \ + echo "Installing sccache..." \ + && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \ && tar -xzf sccache.tar.gz \ && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ - && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ - && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ - && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ -+ && export ACTIONS_CACHE_URL=${ACTIONS_CACHE_URL} \ -+ && export ACTIONS_RUNTIME_TOKEN=${ACTIONS_RUNTIME_TOKEN} \ -+ && export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} \ ++ && export SCCACHE_GHA_ENABLED=1 \ && export SCCACHE_IDLE_TIMEOUT=0 \ && export CMAKE_BUILD_TYPE=Release \ && sccache --show-stats \ diff --git a/patches/vllm-project/vllm/v0.6.2/0002-docker-install-patched-triton.patch b/patches/vllm-project/vllm/v0.6.2/0002-docker-install-patched-triton.patch deleted file mode 120000 index dcab592..0000000 --- a/patches/vllm-project/vllm/v0.6.2/0002-docker-install-patched-triton.patch +++ /dev/null @@ -1 +0,0 @@ -../v0.5.5/0002-docker-install-patched-triton.patch \ No newline at end of file