Skip to content

Commit

Permalink
Local llm mutli-gpu support (#1391)
Browse files Browse the repository at this point in the history
  • Loading branch information
rounak610 authored Jan 12, 2024
1 parent 60d91ff commit 7411a01
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ COPY --from=compile-image /root/nltk_data /root/nltk_data

ENV PATH="/opt/venv/bin:$PATH"

EXPOSE 8001
EXPOSE 8001
45 changes: 45 additions & 0 deletions Dockerfile-gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Define the CUDA SDK version you need
ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
FROM nvidia/cuda:${CUDA_IMAGE}

ENV DEBIAN_FRONTEND=noninteractive

WORKDIR /app

RUN apt-get update && apt-get upgrade -y \
&& apt-get install -y git build-essential \
python3 python3-pip python3.10-venv libpq-dev gcc wget \
ocl-icd-opencl-dev opencl-headers clinfo \
libclblast-dev libopenblas-dev \
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd

# Create a virtual environment and activate it
RUN python3 -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# Install Python dependencies from requirements.txt
COPY requirements.txt .
RUN pip install --upgrade pip && \
pip install --no-cache-dir -r requirements.txt

# Running nltk setup as you mentioned
RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')"

# Copy the application code
COPY . .

ENV CUDA_DOCKER_ARCH=all
ENV LLAMA_CUBLAS=1

RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.7 --force-reinstall --upgrade --no-cache-dir

# Make necessary scripts executable
RUN chmod +x ./entrypoint.sh ./wait-for-it.sh ./install_tool_dependencies.sh ./entrypoint_celery.sh

# Set environment variable to point to the custom libllama.so
# ENV LLAMA_CPP_LIB=/app/llama.cpp/libllama.so

EXPOSE 8001

CMD ["./entrypoint.sh"]
16 changes: 12 additions & 4 deletions README.MD
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,18 @@ cd SuperAGI

4. Ensure that Docker is installed on your system. You can download and install it from [here](https://docs.docker.com/get-docker/).

5. Once you have Docker Desktop running, run the following command in the in the SuperAGI directory :
```
docker-compose up --build
```
5. Once you have Docker Desktop running, run the following command in the SuperAGI directory:

a. For regular usage:
```
docker compose -f docker-compose.yaml up --build
```

b. If you want to use SuperAGI with Local LLMs and have GPU, run the following command:
```
docker compose -f docker-compose-gpu.yml up --build
```


6. Open your web browser and navigate to http://localhost:3000 to access SuperAGI.

Expand Down
1 change: 1 addition & 0 deletions config_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,4 @@ ENGINE_ID: "stable-diffusion-xl-beta-v2-2-2"
## To use Qdrant for vector store
#QDRANT_HOST_NAME: YOUR_QDRANT_HOST_NAME
#QDRANT_PORT: YOUR_QDRANT_PORT
#GPU_LAYERS: GPU LAYERS THAT YOU WANT TO OFFLOAD TO THE GPU WHILE USING LOCAL LLMS
97 changes: 97 additions & 0 deletions docker-compose-gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
version: '3.8'
services:
backend:
volumes:
- "./:/app"
- "/home/ubuntu/models/vicuna-7B-v1.5-GGUF/vicuna-7b-v1.5.Q5_K_M.gguf:/app/local_model_path"
build:
context: .
dockerfile: Dockerfile-gpu
depends_on:
- super__redis
- super__postgres
networks:
- super_network
command: ["/app/wait-for-it.sh", "super__postgres:5432","-t","60","--","/app/entrypoint.sh"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]

celery:
volumes:
- "./:/app"
- "${EXTERNAL_RESOURCE_DIR:-./workspace}:/app/ext"
- "/home/ubuntu/models/vicuna-7B-v1.5-GGUF/vicuna-7b-v1.5.Q5_K_M.gguf:/app/local_model_path"
build:
context: .
dockerfile: Dockerfile-gpu
depends_on:
- super__redis
- super__postgres
networks:
- super_network
command: ["/app/entrypoint_celery.sh"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
gui:
build:
context: ./gui
args:
NEXT_PUBLIC_API_BASE_URL: "/api"
networks:
- super_network
# volumes:
# - ./gui:/app
# - /app/node_modules/
# - /app/.next/
super__redis:
image: "redis/redis-stack-server:latest"
networks:
- super_network
# uncomment to expose redis port to host
# ports:
# - "6379:6379"
volumes:
- redis_data:/data

super__postgres:
image: "docker.io/library/postgres:15"
environment:
- POSTGRES_USER=superagi
- POSTGRES_PASSWORD=password
- POSTGRES_DB=super_agi_main
volumes:
- superagi_postgres_data:/var/lib/postgresql/data/
networks:
- super_network
# uncomment to expose postgres port to host
# ports:
# - "5432:5432"

proxy:
image: nginx:stable-alpine
ports:
- "3000:80"
networks:
- super_network
depends_on:
- backend
- gui
volumes:
- ./nginx/default.conf:/etc/nginx/conf.d/default.conf

networks:
super_network:
driver: bridge
volumes:
superagi_postgres_data:
redis_data:
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,4 @@ google-generativeai==0.1.0
unstructured==0.8.1
ai21==1.2.6
typing-extensions==4.5.0
llama_cpp_python==0.2.7
llama_cpp_python==0.2.7
2 changes: 1 addition & 1 deletion superagi/helper/llm_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def model(self):
if self._model is None:
try:
self._model = Llama(
model_path="/app/local_model_path", n_ctx=self.context_length)
model_path="/app/local_model_path", n_ctx=self.context_length, n_gpu_layers=get_config('GPU_LAYERS', '-1'))
except Exception as e:
logger.error(e)
return self._model
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import unittest
from unittest.mock import patch
import pytest
from superagi.tools.duck_duck_go.duck_duck_go_search import DuckDuckGoSearchTool

Expand All @@ -11,9 +13,13 @@ def test_get_raw_duckduckgo_results_empty_query(self):
result = self.your_obj.get_raw_duckduckgo_results(query)
assert result == expected_result

def test_get_raw_duckduckgo_results_valid_query(self):
@patch('superagi.tools.duck_duck_go.duck_duck_go_search.DuckDuckGoSearchTool.get_raw_duckduckgo_results')
def test_get_raw_duckduckgo_results_valid_query(self, mock_get_raw_duckduckgo_results):
query = "python"
expected_result_length = 10
mock_results = ['result1', 'result2', 'result3', 'result4', 'result5',
'result6', 'result7', 'result8', 'result9', 'result10']
mock_get_raw_duckduckgo_results.return_value = mock_results
result = self.your_obj.get_raw_duckduckgo_results(query)
assert len(result) == expected_result_length

Expand Down

0 comments on commit 7411a01

Please sign in to comment.