zylon-ai · imartinez · Feb 9, 2024 · Jan 21, 2024
diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx
@@ -102,3 +102,33 @@ or
 
 When the server is started it will print a log *Application startup complete*.
 Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
+
+### Using Ollama
+
+Another option for a fully private setup is using [Ollama](https://ollama.ai/).
+
+Note: how to deploy Ollama and pull models onto it is out of the scope of this documentation.
+
+In order to do so, create a profile `settings-ollama.yaml` with the following contents:
+
+```yaml
+llm:
+  mode: ollama
+
+ollama:
+  model: <ollama_model_to_use> # Required Model to use.
+                               # Note: Ollama Models are listed here: https://ollama.ai/library
+                               #       Be sure to pull the model to your Ollama server
+  api_base: <ollama-api-base-url> # Defaults to http://localhost:11434
+```
+
+And run PrivateGPT loading that profile you just created:
+
+`PGPT_PROFILES=ollama make run`
+
+or
+
+`PGPT_PROFILES=ollama poetry run python -m private_gpt`
+
+When the server is started it will print a log *Application startup complete*.
+Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
@@ -80,3 +80,10 @@ def __init__(self, settings: Settings) -> None:
                 )
             case "mock":
                 self.llm = MockLLM()
+            case "ollama":
+                from llama_index.llms import Ollama
+
+                ollama_settings = settings.ollama
+                self.llm = Ollama(
+                    model=ollama_settings.model, base_url=ollama_settings.api_base
+                )
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -81,7 +81,7 @@ class DataSettings(BaseModel):
 
 
 class LLMSettings(BaseModel):
-    mode: Literal["local", "openai", "openailike", "sagemaker", "mock"]
+    mode: Literal["local", "openai", "openailike", "sagemaker", "mock", "ollama"]
     max_new_tokens: int = Field(
         256,
         description="The maximum number of token that the LLM is authorized to generate in one completion.",
@@ -168,6 +168,17 @@ class OpenAISettings(BaseModel):
     )
 
 
+class OllamaSettings(BaseModel):
+    api_base: str = Field(
+        "http://localhost:11434",
+        description="Base URL of Ollama API. Example: 'https://localhost:11434'.",
+    )
+    model: str = Field(
+        None,
+        description="Model to use. Example: 'llama2-uncensored'.",
+    )
+
+
 class UISettings(BaseModel):
     enabled: bool
     path: str
@@ -243,6 +254,7 @@ class Settings(BaseModel):
     local: LocalSettings
     sagemaker: SagemakerSettings
     openai: OpenAISettings
+    ollama: OllamaSettings
     vectorstore: VectorstoreSettings
     qdrant: QdrantSettings | None = None
 

diff --git a/settings.yaml b/settings.yaml
@@ -63,3 +63,6 @@ sagemaker:
 openai:
   api_key: ${OPENAI_API_KEY:}
   model: gpt-3.5-turbo
+
+ollama:
+  model: llama2-uncensored