From 6bbec79583b7f28d9bea4b39c099ebef149db843 Mon Sep 17 00:00:00 2001
From: Ygal Blum <ygal.blum@gmail.com>
Date: Fri, 9 Feb 2024 16:50:50 +0200
Subject: [PATCH] feat(llm): Add support for Ollama LLM (#1526)

---
 fern/docs/pages/manual/llms.mdx             | 30 +++++++++++++++++++++
 private_gpt/components/llm/llm_component.py |  7 +++++
 private_gpt/settings/settings.py            | 14 +++++++++-
 settings.yaml                               |  3 +++
 4 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx
index 059fb594e..7445bff19 100644
--- a/fern/docs/pages/manual/llms.mdx
+++ b/fern/docs/pages/manual/llms.mdx
@@ -102,3 +102,33 @@ or
 
 When the server is started it will print a log *Application startup complete*.
 Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
+
+### Using Ollama
+
+Another option for a fully private setup is using [Ollama](https://ollama.ai/).
+
+Note: how to deploy Ollama and pull models onto it is out of the scope of this documentation.
+
+In order to do so, create a profile `settings-ollama.yaml` with the following contents:
+
+```yaml
+llm:
+  mode: ollama
+
+ollama:
+  model: <ollama_model_to_use> # Required Model to use.
+                               # Note: Ollama Models are listed here: https://ollama.ai/library
+                               #       Be sure to pull the model to your Ollama server
+  api_base: <ollama-api-base-url> # Defaults to http://localhost:11434
+```
+
+And run PrivateGPT loading that profile you just created:
+
+`PGPT_PROFILES=ollama make run`
+
+or
+
+`PGPT_PROFILES=ollama poetry run python -m private_gpt`
+
+When the server is started it will print a log *Application startup complete*.
+Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
index 971cfa3bf..eebbdff0b 100644
--- a/private_gpt/components/llm/llm_component.py
+++ b/private_gpt/components/llm/llm_component.py
@@ -80,3 +80,10 @@ def __init__(self, settings: Settings) -> None:
                 )
             case "mock":
                 self.llm = MockLLM()
+            case "ollama":
+                from llama_index.llms import Ollama
+
+                ollama_settings = settings.ollama
+                self.llm = Ollama(
+                    model=ollama_settings.model, base_url=ollama_settings.api_base
+                )
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
index 499ce66d7..ed65c203a 100644
--- a/private_gpt/settings/settings.py
+++ b/private_gpt/settings/settings.py
@@ -81,7 +81,7 @@ class DataSettings(BaseModel):
 
 
 class LLMSettings(BaseModel):
-    mode: Literal["local", "openai", "openailike", "sagemaker", "mock"]
+    mode: Literal["local", "openai", "openailike", "sagemaker", "mock", "ollama"]
     max_new_tokens: int = Field(
         256,
         description="The maximum number of token that the LLM is authorized to generate in one completion.",
@@ -168,6 +168,17 @@ class OpenAISettings(BaseModel):
     )
 
 
+class OllamaSettings(BaseModel):
+    api_base: str = Field(
+        "http://localhost:11434",
+        description="Base URL of Ollama API. Example: 'https://localhost:11434'.",
+    )
+    model: str = Field(
+        None,
+        description="Model to use. Example: 'llama2-uncensored'.",
+    )
+
+
 class UISettings(BaseModel):
     enabled: bool
     path: str
@@ -243,6 +254,7 @@ class Settings(BaseModel):
     local: LocalSettings
     sagemaker: SagemakerSettings
     openai: OpenAISettings
+    ollama: OllamaSettings
     vectorstore: VectorstoreSettings
     qdrant: QdrantSettings | None = None
 
diff --git a/settings.yaml b/settings.yaml
index d7e7ce028..0ffbfcaef 100644
--- a/settings.yaml
+++ b/settings.yaml
@@ -63,3 +63,6 @@ sagemaker:
 openai:
   api_key: ${OPENAI_API_KEY:}
   model: gpt-3.5-turbo
+
+ollama:
+  model: llama2-uncensored