From 6bbec79583b7f28d9bea4b39c099ebef149db843 Mon Sep 17 00:00:00 2001 From: Ygal Blum Date: Fri, 9 Feb 2024 16:50:50 +0200 Subject: [PATCH] feat(llm): Add support for Ollama LLM (#1526) --- fern/docs/pages/manual/llms.mdx | 30 +++++++++++++++++++++ private_gpt/components/llm/llm_component.py | 7 +++++ private_gpt/settings/settings.py | 14 +++++++++- settings.yaml | 3 +++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx index 059fb594e..7445bff19 100644 --- a/fern/docs/pages/manual/llms.mdx +++ b/fern/docs/pages/manual/llms.mdx @@ -102,3 +102,33 @@ or When the server is started it will print a log *Application startup complete*. Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. + +### Using Ollama + +Another option for a fully private setup is using [Ollama](https://ollama.ai/). + +Note: how to deploy Ollama and pull models onto it is out of the scope of this documentation. + +In order to do so, create a profile `settings-ollama.yaml` with the following contents: + +```yaml +llm: + mode: ollama + +ollama: + model: # Required Model to use. + # Note: Ollama Models are listed here: https://ollama.ai/library + # Be sure to pull the model to your Ollama server + api_base: # Defaults to http://localhost:11434 +``` + +And run PrivateGPT loading that profile you just created: + +`PGPT_PROFILES=ollama make run` + +or + +`PGPT_PROFILES=ollama poetry run python -m private_gpt` + +When the server is started it will print a log *Application startup complete*. +Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 971cfa3bf..eebbdff0b 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -80,3 +80,10 @@ def __init__(self, settings: Settings) -> None: ) case "mock": self.llm = MockLLM() + case "ollama": + from llama_index.llms import Ollama + + ollama_settings = settings.ollama + self.llm = Ollama( + model=ollama_settings.model, base_url=ollama_settings.api_base + ) diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 499ce66d7..ed65c203a 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -81,7 +81,7 @@ class DataSettings(BaseModel): class LLMSettings(BaseModel): - mode: Literal["local", "openai", "openailike", "sagemaker", "mock"] + mode: Literal["local", "openai", "openailike", "sagemaker", "mock", "ollama"] max_new_tokens: int = Field( 256, description="The maximum number of token that the LLM is authorized to generate in one completion.", @@ -168,6 +168,17 @@ class OpenAISettings(BaseModel): ) +class OllamaSettings(BaseModel): + api_base: str = Field( + "http://localhost:11434", + description="Base URL of Ollama API. Example: 'https://localhost:11434'.", + ) + model: str = Field( + None, + description="Model to use. Example: 'llama2-uncensored'.", + ) + + class UISettings(BaseModel): enabled: bool path: str @@ -243,6 +254,7 @@ class Settings(BaseModel): local: LocalSettings sagemaker: SagemakerSettings openai: OpenAISettings + ollama: OllamaSettings vectorstore: VectorstoreSettings qdrant: QdrantSettings | None = None diff --git a/settings.yaml b/settings.yaml index d7e7ce028..0ffbfcaef 100644 --- a/settings.yaml +++ b/settings.yaml @@ -63,3 +63,6 @@ sagemaker: openai: api_key: ${OPENAI_API_KEY:} model: gpt-3.5-turbo + +ollama: + model: llama2-uncensored