Niek · mkellerman · Apr 10, 2023 · Apr 10, 2023 · Apr 10, 2023 · Apr 10, 2023
diff --git a/.env b/.env
@@ -1,2 +1,7 @@
-# Uncomment the following line to use the mocked API
-#VITE_API_BASE=http://localhost:5174
+# # Uncomment the following line to use the llama or mocked API
+# VITE_API_BASE=http://localhost:5174
+
+# # Uncomment and provide the path to your model file
+# # the models directory is mounted into the container and
+# # is the default location for the model file
+# MODEL=/models/ggml-vicuna-7b-4bit.bin
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,6 @@ dist-ssr
 *.sln
 *.sw?
 *.env
+
+# llama-api
+models
diff --git a/README.md b/README.md
@@ -44,6 +44,15 @@ git subtree pull --prefix src/awesome-chatgpt-prompts https://github.com/f/aweso
 docker compose up -d
 ```
 
+## Llama api
+If you want to use local/offline models using llama.cpp, you can use llama-cpp-python API instead. 
+
+To use the llama API:
+- Copy your models in the root of the project in a folder called `models`. 
+- Edit the `docker-compose.yml` file at the root of the project and uncomment the `llama-api` service and the `depends-on` in the chatgpt-web service.
+- Edit the `.env` file at root of the project and uncommentt the key `VITE_API_BASE=http://localhost:5174` in it. You will also need to alter the name of the model path to point to the model you wish to use.
+- Run the `docker compose up -d` command above.
+
 ## Mocked api
 If you don't want to wait for the API to respond, you can use the mocked API instead. To use the mocked API, edit the `.env` file at root of the project ans set the key `VITE_API_BASE=http://localhost:5174` in it. Then, run the `docker compose up -d` command above.
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -4,8 +4,9 @@ services:
   chatgpt_web:
     container_name: chatgpt_web
     restart: always
-    depends_on:
-      - mocked_api
+    # depends_on:
+    #   - mocked_api
+    #   - llama_api
     env_file:
       - .env
     ports:
@@ -16,11 +17,20 @@ services:
       context: "."
       dockerfile: Dockerfile
 
-  mocked_api:
-    container_name: mocked_api
-    build:
-        context: "."
-        dockerfile: mocked_api/Dockerfile-mockapi
-    restart: always
-    ports:
-      - 5174:5174
+  # mocked_api:
+  #   container_name: mocked_api
+  #   build:
+  #       context: "."
+  #       dockerfile: mocked_api/Dockerfile-mockapi
+  #   restart: always
+  #   ports:
+  #     - 5174:5174
+
+  # llama_api:
+  #   build: ./llama_api/.
+  #   ports:
+  #     - 5174:8000
+  #   env_file:
+  #    - .env
+  #   volumes:
+  #     - ./models/:/models:cached
diff --git a/llama_api/Dockerfile b/llama_api/Dockerfile
@@ -0,0 +1,18 @@
+FROM python:3.11-buster
+
+RUN pip3 --disable-pip-version-check --no-cache-dir \
+      install \
+        llama-cpp-python \
+        uvicorn \
+        fastapi \
+        sse_starlette \
+        typing_extensions
+
+
+RUN apt-get update && apt-get install -y git
+RUN git clone https://github.com/abetlen/llama-cpp-python.git /app
+
+EXPOSE 8000
+
+ADD ./entrypoint.sh /
+ENTRYPOINT [ "sh", "/entrypoint.sh" ]
diff --git a/llama_api/entrypoint.sh b/llama_api/entrypoint.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+set -e
+
+echo "MODEL=$MODEL"
+
+if [ "$MODEL" = "/models/ggml-vicuna-7b-4bit.bin" ]; then
+    # If the file doesn't exists, download it.
+    if [ ! -f "$MODEL" ]; then
+        wget --no-clobber https://huggingface.co/eachadea/ggml-vicuna-7b-4bit/resolve/main/ggml-vicuna-7b-4bit.bin -O /models/ggml-vicuna-7b-4bit.bin;
+    fi
+fi
+
+exec uvicorn --app-dir=/app/examples/high_level_api --reload --host 0.0.0.0 fastapi_server:app