Skip to content

Commit

Permalink
Merge pull request #557 from sirajperson/main
Browse files Browse the repository at this point in the history
This PR updates the docker files for launching local LLMs. It updates…
  • Loading branch information
I’m authored Jun 30, 2023
2 parents 7f1c1a9 + ae019db commit c6cff18
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 29 deletions.
13 changes: 6 additions & 7 deletions local-llm-gpu
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ services:

super__tgwui:
build:
context: .
context: ./tgwui/
target: llama-cublas
dockerfile: ./tgwui/DockerfileTGWUI
dockerfile: DockerfileTGWUI
# args:
# - LCL_SRC_DIR=text-generation-webui # Developers - see Dockerfile app_base
image: atinoda/text-generation-webui:llama-cublas # Specify variant as the :tag
container_name: super__tgwui
environment:
- EXTRA_LAUNCH_ARGS="--listen --no-mmap --verbose --extensions openai --auto-devices --n_ctx 1600 --gpu-memory 20 20 --n-gpu-layers 128 --threads 8 --model vicuna-13b-cot.ggmlv3.q8_0.bin"
- EXTRA_LAUNCH_ARGS="--no-mmap --verbose --extensions openai --auto-devices --n_ctx 2000 --gpu-memory 22 22 --n-gpu-layers 128 --threads 8"
# - BUILD_EXTENSIONS_LIVE="silero_tts whisper_stt" # Install named extensions during every container launch. THIS WILL SIGNIFICANLTLY SLOW LAUNCH TIME.
ports:
- 7860:7860 # Default web port
- 5000:5000 # Default API port
Expand All @@ -62,15 +64,14 @@ services:
- ./tgwui/config/prompts:/app/prompts
- ./tgwui/config/softprompts:/app/softprompts
- ./tgwui/config/training:/app/training
- ./tgwui/config/embeddings:/app/embeddings
# - ./config/extensions:/app/extensions
logging:
driver: json-file
options:
max-file: "3" # number of files or file count
max-size: '10m'
networks:
- super_network
### Uncomment the following lines to run the container using the host machine's GPU resources
deploy:
resources:
reservations:
Expand All @@ -79,8 +80,6 @@ services:
# count: "all"
device_ids: ['0', '1'] # must comment the above line if this line is uncommented.
capabilities: [gpu]


super__redis:
image: "docker.io/library/redis:latest"
networks:
Expand Down
37 changes: 15 additions & 22 deletions tgwui/DockerfileTGWUI
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,26 @@ RUN git clone https://github.com/oobabooga/text-generation-webui /src
# To use local source: comment out the git clone command then set the build arg `LCL_SRC_DIR`
#ARG LCL_SRC_DIR="text-generation-webui"
#COPY ${LCL_SRC_DIR} /src
# This is required to get multi-gpu support until the main branch updates the requirements.txt file to include llama-cpp-python 0.1.59 or greater.

#################################
ENV LLAMA_CUBLAS=1
# Copy source to app
RUN cp -ar /src /app
# Install oobabooga/text-generation-webui
RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r /app/requirements.txt
# Install extensions
COPY tgwui/scripts/build_extensions.sh /app/scripts/build_extensions.sh
COPY ./scripts/build_extensions.sh /scripts/build_extensions.sh
RUN --mount=type=cache,target=/root/.cache/pip \
chmod +x /app/scripts/build_extensions.sh && . /app/scripts/build_extensions.sh

## Clone default GPTQ
RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git
## Build and install default GPTQ ('quant_cuda')
chmod +x /scripts/build_extensions.sh && . /scripts/build_extensions.sh
# Clone default GPTQ
RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repositories/GPTQ-for-LLaMa
# Build and install default GPTQ ('quant_cuda')
ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
RUN cd GPTQ-for-LLaMa/ && python3 setup_cuda.py install
RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install

FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base
# Runtime pre-reqs
RUN apt-get update && apt-get install --no-install-recommends -y \
python3-venv python3-dev git
python3-venv python3-dev git
# Copy app and src
COPY --from=app_base /app /app
COPY --from=app_base /src /src
Expand All @@ -49,30 +47,26 @@ COPY --from=app_base /venv /venv
ENV VIRTUAL_ENV=/venv
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
## Link models directory to container
#ADD ./config/models/ /app/models/
# Finalise app setup
WORKDIR /app
EXPOSE 7860
EXPOSE 5000
EXPOSE 5005
EXPOSE 5001
# Required for Python print statements to appear in logs
ENV PYTHONUNBUFFERED=1
# Force variant layers to sync cache by setting --build-arg BUILD_DATE
ARG BUILD_DATE
ENV BUILD_DATE=$BUILD_DATE
RUN echo "$BUILD_DATE" > /build_date.txt

# Set embeddings model for llama
#ENV OPENEDAI_EMBEDDING_MODEL=/app/embeddings/SGPT-125M-weightedmean-nli-bitfit
#COPY tgwui/config/embeddings/SGPT-125M-weightedmean-nli-bitfit /app/embeddings
#RUN echo -e "Embeddings model $OPENEDAI_EMBEDDING_MODEL"
#RUN python extensions/openai/cache_embedding_model.py

# Copy and enable all scripts
COPY ./scripts /scripts
RUN chmod +x /scripts/*
# Run
COPY tgwui/scripts/docker-entrypoint.sh /scripts/docker-entrypoint.sh
RUN chmod +x /scripts/docker-entrypoint.sh
ENTRYPOINT ["/scripts/docker-entrypoint.sh"]


# VARIANT BUILDS
FROM base AS cuda
RUN echo "CUDA" >> /variant.txt
Expand Down Expand Up @@ -100,8 +94,7 @@ FROM base AS llama-cublas
RUN echo "LLAMA-CUBLAS" >> /variant.txt
RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
ENV LLAMA_CUBLAS=1
RUN pip uninstall -y llama-cpp-python && \
CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
RUN pip uninstall -y llama-cpp-python && pip install llama-cpp-python
ENV EXTRA_LAUNCH_ARGS=""
CMD ["python3", "/app/server.py"]

Expand Down
Empty file.

0 comments on commit c6cff18

Please sign in to comment.