Skip to content

Commit

Permalink
⚙️ Refactor TGI Dockerfile to support Google-Cloud-Containers as a ta…
Browse files Browse the repository at this point in the history
…rget (#127)
  • Loading branch information
baptistecolle authored Dec 19, 2024
1 parent e302950 commit 1d74594
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 15 deletions.
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))

.PHONY: build_dist style style_check clean

TGI_VERSION ?= 690702b1ce9a27ce5bdf2a9dd3a80277ecea12cd
TGI_VERSION ?= v2.4.1

rwildcard=$(wildcard $1) $(foreach d,$1,$(call rwildcard,$(addsuffix /$(notdir $d),$(wildcard $(dir $d)*))))

Expand All @@ -42,8 +42,6 @@ clean:
rm -rf dist deps
make -C text-generation-inference/server/ clean

# ulimit nofile=100000:100000 is required for TPUs
# https://cloud.google.com/kubernetes-engine/docs/how-to/tpus#privileged-mode
tpu-tgi:
docker build --rm -f text-generation-inference/docker/Dockerfile \
--build-arg VERSION=$(VERSION) \
Expand All @@ -61,6 +59,14 @@ tpu-tgi-ie:
-t huggingface/optimum-tpu:$(VERSION)-tgi .
docker tag huggingface/optimum-tpu:$(VERSION)-tgi huggingface/optimum-tpu:latest-ie

tpu-tgi-gcp:
docker build --rm -f text-generation-inference/docker/Dockerfile \
--target google-cloud-containers \
--build-arg ENABLE_GCP_INTEGRATION=1 \
--ulimit nofile=100000:100000 \
-t huggingface/optimum-tpu:$(VERSION)-tgi-gcp .
docker tag huggingface/optimum-tpu:$(VERSION)-tgi-gcp huggingface/optimum-tpu:latest-gcp

# Run code quality checks
style_check:
ruff check .
Expand Down
75 changes: 63 additions & 12 deletions text-generation-inference/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Fetch and extract the TGI sources (TGI_VERSION is mandatory)
# Fetch and extract the TGI sources
FROM alpine AS tgi
ARG TGI_VERSION
# TGI version 2.4.1 by default
ARG TGI_VERSION=v2.4.1
RUN test -n ${TGI_VERSION:?}
RUN mkdir -p /tgi
ADD https://github.com/huggingface/text-generation-inference/archive/${TGI_VERSION}.tar.gz /tgi/sources.tar.gz
Expand All @@ -25,6 +26,8 @@ COPY --from=tgi /tgi/launcher launcher
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder
ARG ENABLE_GOOGLE_FEATURE
RUN echo "Google Feature Status: ${ENABLE_GOOGLE_FEATURE}"

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python3.11-dev
Expand All @@ -45,7 +48,11 @@ COPY --from=tgi /tgi/benchmark benchmark
COPY --from=tgi /tgi/router router
COPY --from=tgi /tgi/backends backends
COPY --from=tgi /tgi/launcher launcher
RUN cargo build --profile release-opt
RUN if [ -n "$ENABLE_GOOGLE_FEATURE" ]; then \
cargo build --profile release-opt --features google; \
else \
cargo build --profile release-opt; \
fi

# Python base image
FROM ubuntu:22.04 AS base
Expand All @@ -55,14 +62,32 @@ RUN apt-get update -y \
python3-pip \
python3-setuptools \
python-is-python3 \
git \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN pip3 --no-cache-dir install --upgrade pip

# VERSION is a mandatory parameter
ARG VERSION
ARG ENABLE_GOOGLE_FEATURE
ARG VERSION='0.2.2'
RUN test -n ${VERSION:?}

FROM base AS optimum-tpu-installer

COPY . /tmp/src

RUN if [ -n "$ENABLE_GOOGLE_FEATURE" ]; then \
# If we are building for GCP, we need to clone the optimum-tpu repo as this is built from the huggingface/Google-Cloud-Containers repository and not the huggingface/optimum-tpu repository
git clone https://github.com/huggingface/optimum-tpu.git /opt/optimum-tpu && \
cd /opt/optimum-tpu && git checkout v${VERSION}; \
fi && \
# Check if the optimum-tpu repo is cloned properly
cp -a /tmp/src /opt/optimum-tpu && \
if [ ! -d "/opt/optimum-tpu/optimum" ]; then \
echo "Error: Building from incorrect repository. This build must be run from optimum-tpu repo. If building from google-cloud-containers repo, set ENABLE_GOOGLE_FEATURE=1 to automatically clone optimum-tpu" && \
exit 1; \
fi


# Python server build image
FROM base AS pyserver

Expand All @@ -75,7 +100,7 @@ RUN apt-get update -y \

RUN install -d /pyserver
WORKDIR /pyserver
COPY text-generation-inference/server server
COPY --from=optimum-tpu-installer /opt/optimum-tpu/text-generation-inference/server server
COPY --from=tgi /tgi/proto proto
RUN pip3 install -r server/build-requirements.txt
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto VERSION=${VERSION} make -C server gen-server
Expand All @@ -94,6 +119,7 @@ RUN apt-get update -y \
git \
gnupg2 \
wget \
curl \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

Expand All @@ -105,19 +131,23 @@ ARG TRANSFORMERS_VERSION='4.46.3'
ARG ACCELERATE_VERSION='1.1.1'
ARG SAFETENSORS_VERSION='0.4.5'

# TGI base env
ENV HF_HOME=/data \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=80 \
VERSION=${VERSION}
ARG ENABLE_GOOGLE_FEATURE

ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV VERSION=${VERSION}

COPY . /opt/optimum-tpu
ENV PORT=${ENABLE_GOOGLE_FEATURE:+8080}
ENV PORT=${PORT:-80}

ENV HF_HOME=${ENABLE_GOOGLE_FEATURE:+/tmp}
ENV HF_HOME=${HF_HOME:-/data}

# Install requirements for TGI, that uses python3.11
RUN python3.11 -m pip install transformers==${TRANSFORMERS_VERSION}

# Install requirements for optimum-tpu, then for TGI then optimum-tpu
RUN python3 -m pip install hf_transfer safetensors==${SAFETENSORS_VERSION} typer
COPY --from=optimum-tpu-installer /opt/optimum-tpu /opt/optimum-tpu
RUN python3 /opt/optimum-tpu/optimum/tpu/cli.py install-jetstream-pytorch --yes
RUN python3 -m pip install -e /opt/optimum-tpu \
-f https://storage.googleapis.com/libtpu-releases/index.html
Expand All @@ -138,7 +168,28 @@ FROM tpu_base AS inference-endpoint

COPY text-generation-inference/docker/entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]

FROM tpu_base AS google-cloud-containers

# Install Google specific components if ENABLE_GOOGLE_FEATURE is set
RUN if [ -n "$ENABLE_GOOGLE_FEATURE" ]; then \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
curl \
git && \
rm -rf /var/lib/apt/lists/* && \
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
apt-get update -y && \
apt-get install google-cloud-sdk -y; \
fi

# Custom entrypoint for Google
COPY --chmod=775 containers/tgi/tpu/${VERSION}/entrypoint.sh* entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]

# TPU compatible image
Expand Down

0 comments on commit 1d74594

Please sign in to comment.