zylon-ai · pabloogc · Oct 27, 2023 · Oct 26, 2023 · Oct 27, 2023 · Oct 27, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -3,6 +3,9 @@ models
 .github
 .vscode
 .DS_Store
+.mypy_cache
+.ruff_cache
+local_data
 terraform
 tests
 Dockerfile

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -0,0 +1,51 @@
+name: Create and publish a Docker image
+
+on:
+  release:
+    types: [ published ]
+  push:
+    branches:
+      - main
+  pull_request:
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+    # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
+    permissions:
+      contents: read
+      packages: write
+      #
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=sha
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
diff --git a/Dockerfile b/Dockerfile
@@ -23,10 +23,7 @@ FROM base as dependencies
 WORKDIR /home/worker/app
 COPY pyproject.toml poetry.lock ./
 
-RUN poetry install --with local
 RUN poetry install --with ui
-RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"\
-    poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python
 
 FROM base as app
 
@@ -39,9 +36,11 @@ EXPOSE 8080
 RUN adduser --system worker
 WORKDIR /home/worker/app
 
-# Copy everything, including the virtual environment
-COPY --chown=worker --from=dependencies /home/worker/app .
-COPY --chown=worker . .
+RUN mkdir "local_data"; chown worker local_data
+COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
+COPY --chown=worker private_gpt/ private_gpt
+COPY --chown=worker docs/ docs
+COPY --chown=worker *.yaml *.md ./
 
 USER worker
 ENTRYPOINT .venv/bin/python -m private_gpt
diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
@@ -13,7 +13,7 @@ class EmbeddingComponent:
     @inject
     def __init__(self) -> None:
         match settings.llm.mode:
-            case "local":
+            case "local" | "sagemaker":
                 from llama_index.embeddings import HuggingFaceEmbedding
 
                 self.embedding_model = HuggingFaceEmbedding(

diff --git a/private_gpt/components/llm/custom/sagemaker.py b/private_gpt/components/llm/custom/sagemaker.py
@@ -21,8 +21,6 @@
 )
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
-
     from llama_index.callbacks import CallbackManager
     from llama_index.llms import (
         CompletionResponseGen,
@@ -113,10 +111,10 @@ class SagemakerLLM(CustomLLM):
     context_window: int = Field(
         description="The maximum number of context tokens for the model."
     )
-    messages_to_prompt: Callable[..., str] = Field(
+    messages_to_prompt: Any = Field(
         description="The function to convert messages to a prompt.", exclude=True
     )
-    completion_to_prompt: Callable[..., str] = Field(
+    completion_to_prompt: Any = Field(
         description="The function to convert a completion to a prompt.", exclude=True
     )
     generate_kwargs: dict[str, Any] = Field(

diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
@@ -37,6 +37,8 @@ def __init__(self) -> None:
 
                 self.llm = SagemakerLLM(
                     endpoint_name=settings.sagemaker.endpoint_name,
+                    messages_to_prompt=messages_to_prompt,
+                    completion_to_prompt=completion_to_prompt,
                 )
             case "openai":
                 from llama_index.llms import OpenAI

diff --git a/settings-docker.yaml b/settings-docker.yaml
@@ -3,12 +3,15 @@ server:
   port: ${PORT:8080}
 
 llm:
-  mode: local
+  mode: ${PGPT_MODE:mock}
 
 local:
-  llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
-  llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
-  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
+  llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
+  embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
+
+sagemaker:
+  endpoint_name: ${PGPT_SAGEMAKER_ENDPOINT_NAME:}
 
 ui:
   enabled: true