diff --git a/.dockerignore b/.dockerignore index 35b2f4cfe..c156a9b8a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,6 +3,9 @@ models .github .vscode .DS_Store +.mypy_cache +.ruff_cache +local_data terraform tests Dockerfile diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 000000000..a0a0ec160 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,47 @@ +name: Create and publish a Docker image + +on: + release: + types: [ published ] + push: + branches: + - main + pull_request: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 170531a8c..309c2a17e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,10 +23,7 @@ FROM base as dependencies WORKDIR /home/worker/app COPY pyproject.toml poetry.lock ./ -RUN poetry install --with local RUN poetry install --with ui -RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"\ - poetry run pip install --force-reinstall --no-cache-dir llama-cpp-python FROM base as app @@ -39,9 +36,11 @@ EXPOSE 8080 RUN adduser --system worker WORKDIR /home/worker/app -# Copy everything, including the virtual environment -COPY --chown=worker --from=dependencies /home/worker/app . -COPY --chown=worker . . +RUN mkdir "local_data"; chown worker local_data +COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv +COPY --chown=worker private_gpt/ private_gpt +COPY --chown=worker docs/ docs +COPY --chown=worker *.yaml *.md ./ USER worker ENTRYPOINT .venv/bin/python -m private_gpt \ No newline at end of file diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index 61fe6fad1..079e72b86 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -13,7 +13,7 @@ class EmbeddingComponent: @inject def __init__(self) -> None: match settings.llm.mode: - case "local": + case "local" | "sagemaker": from llama_index.embeddings import HuggingFaceEmbedding self.embedding_model = HuggingFaceEmbedding( diff --git a/private_gpt/components/llm/custom/sagemaker.py b/private_gpt/components/llm/custom/sagemaker.py index 9286de616..08f56a955 100644 --- a/private_gpt/components/llm/custom/sagemaker.py +++ b/private_gpt/components/llm/custom/sagemaker.py @@ -21,8 +21,6 @@ ) if TYPE_CHECKING: - from collections.abc import Callable - from llama_index.callbacks import CallbackManager from llama_index.llms import ( CompletionResponseGen, @@ -113,10 +111,10 @@ class SagemakerLLM(CustomLLM): context_window: int = Field( description="The maximum number of context tokens for the model." ) - messages_to_prompt: Callable[..., str] = Field( + messages_to_prompt: Any = Field( description="The function to convert messages to a prompt.", exclude=True ) - completion_to_prompt: Callable[..., str] = Field( + completion_to_prompt: Any = Field( description="The function to convert a completion to a prompt.", exclude=True ) generate_kwargs: dict[str, Any] = Field( diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 2c32897c6..cbd71ce1f 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -37,6 +37,8 @@ def __init__(self) -> None: self.llm = SagemakerLLM( endpoint_name=settings.sagemaker.endpoint_name, + messages_to_prompt=messages_to_prompt, + completion_to_prompt=completion_to_prompt, ) case "openai": from llama_index.llms import OpenAI diff --git a/settings-docker.yaml b/settings-docker.yaml index 73255da1b..12fea9f20 100644 --- a/settings-docker.yaml +++ b/settings-docker.yaml @@ -3,12 +3,15 @@ server: port: ${PORT:8080} llm: - mode: local + mode: ${PGPT_MODE:mock} local: - llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF - llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf - embedding_hf_model_name: BAAI/bge-small-en-v1.5 + llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF} + llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf} + embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5} + +sagemaker: + endpoint_name: ${PGPT_SAGEMAKER_ENDPOINT_NAME:} ui: enabled: true