From d7ed5f69810edb9392e0b95d93b4406de67bc141 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 8 Oct 2024 14:06:31 -0700 Subject: [PATCH 1/5] Add listing llm models and embedding models --- .../llm_model_configs/azure-gpt-4o-mini.json | 1 - letta/llm_api/azure_openai.py | 90 ++++++++----------- letta/llm_api/llm_api_tools.py | 3 + letta/providers.py | 64 ++++++++++++- letta/schemas/llm_config.py | 3 - letta/server/server.py | 8 +- letta/settings.py | 1 + .../llm_model_configs/azure-gpt-4o-mini.json | 1 - tests/test_providers.py | 17 +++- 9 files changed, 127 insertions(+), 61 deletions(-) diff --git a/configs/llm_model_configs/azure-gpt-4o-mini.json b/configs/llm_model_configs/azure-gpt-4o-mini.json index 58eb3a00dc..323b2cae9b 100644 --- a/configs/llm_model_configs/azure-gpt-4o-mini.json +++ b/configs/llm_model_configs/azure-gpt-4o-mini.json @@ -2,6 +2,5 @@ "context_window": 128000, "model": "gpt-4o-mini", "model_endpoint_type": "azure", - "api_version": "2023-03-15-preview", "model_wrapper": null } diff --git a/letta/llm_api/azure_openai.py b/letta/llm_api/azure_openai.py index 57b49f7cfc..9ab840a671 100644 --- a/letta/llm_api/azure_openai.py +++ b/letta/llm_api/azure_openai.py @@ -1,5 +1,3 @@ -from typing import Union - import requests from letta.schemas.llm_config import LLMConfig @@ -7,70 +5,58 @@ from letta.schemas.openai.chat_completions import ChatCompletionRequest from letta.schemas.openai.embedding_response import EmbeddingResponse from letta.settings import ModelSettings -from letta.utils import smart_urljoin -MODEL_TO_AZURE_ENGINE = { - "gpt-4-1106-preview": "gpt-4", - "gpt-4": "gpt-4", - "gpt-4-32k": "gpt-4-32k", - "gpt-3.5": "gpt-35-turbo", - "gpt-3.5-turbo": "gpt-35-turbo", - "gpt-3.5-turbo-16k": "gpt-35-turbo-16k", - "gpt-4o-mini": "gpt-4o-mini", -} + +def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str): + return f"{base_url}/openai/deployments/{model}/chat/completions?api-version={api_version}" -def get_azure_endpoint(llm_config: LLMConfig, model_settings: ModelSettings): - assert llm_config.api_version, "Missing model version! This field must be provided in the LLM config for Azure." - assert llm_config.model in MODEL_TO_AZURE_ENGINE, f"{llm_config.model} not in supported models: {list(MODEL_TO_AZURE_ENGINE.keys())}" +def get_azure_embeddings_endpoint(base_url: str, model: str, api_version: str): + return f"{base_url}/openai/deployments/{model}/embeddings?api-version={api_version}" - model = MODEL_TO_AZURE_ENGINE[llm_config.model] - return f"{model_settings.azure_base_url}/openai/deployments/{model}/chat/completions?api-version={llm_config.api_version}" +def get_azure_model_list_endpoint(base_url: str, api_version: str): + return f"{base_url}/openai/models?api-version={api_version}" -def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict: + +def azure_openai_get_model_list(base_url: str, api_key: str, api_version: str) -> list: """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP""" - from letta.utils import printd # https://xxx.openai.azure.com/openai/models?api-version=xxx - url = smart_urljoin(url, "openai") - url = smart_urljoin(url, f"models?api-version={api_version}") - headers = {"Content-Type": "application/json"} if api_key is not None: headers["api-key"] = f"{api_key}" - printd(f"Sending request to {url}") + url = get_azure_model_list_endpoint(base_url, api_version) try: response = requests.get(url, headers=headers) - response.raise_for_status() # Raises HTTPError for 4XX/5XX status - response = response.json() # convert to dict from string - printd(f"response = {response}") - return response - except requests.exceptions.HTTPError as http_err: - # Handle HTTP errors (e.g., response 4XX, 5XX) - try: - response = response.json() - except: - pass - printd(f"Got HTTPError, exception={http_err}, response={response}") - raise http_err - except requests.exceptions.RequestException as req_err: - # Handle other requests-related errors (e.g., connection error) - try: - response = response.json() - except: - pass - printd(f"Got RequestException, exception={req_err}, response={response}") - raise req_err - except Exception as e: - # Handle other potential errors - try: - response = response.json() - except: - pass - printd(f"Got unknown Exception, exception={e}, response={response}") - raise e + response.raise_for_status() + except requests.RequestException as e: + raise RuntimeError(f"Failed to retrieve model list: {e}") + + return response.json().get("data", []) + + +def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list: + model_list = azure_openai_get_model_list(base_url, api_key, api_version) + # Extract models that support text generation + model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True] + return model_options + + +def azure_openai_get_embeddings_model_list(base_url: str, api_key: str, api_version: str, require_embedding_in_name: bool = True) -> list: + def valid_embedding_model(m: dict): + valid_name = True + if require_embedding_in_name: + valid_name = "embedding" in m["id"] + + return m.get("capabilities").get("embeddings") == True and valid_name + + model_list = azure_openai_get_model_list(base_url, api_key, api_version) + # Extract models that support embeddings + + model_options = [m for m in model_list if valid_embedding_model(m)] + return model_options def azure_openai_chat_completions_request( @@ -93,7 +79,7 @@ def azure_openai_chat_completions_request( data.pop("tools") data.pop("tool_choice", None) # extra safe, should exist always (default="auto") - model_endpoint = get_azure_endpoint(llm_config, model_settings) + model_endpoint = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.api_version) printd(f"Sending request to {model_endpoint}") try: response = requests.post(model_endpoint, headers=headers, json=data) diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 7ccd23ac3e..fb078c04b2 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -189,6 +189,9 @@ def create( if model_settings.azure_base_url is None: raise ValueError(f"Azure base url is missing. Did you set AZURE_BASE_URL in your env?") + if model_settings.azure_api_version is None: + raise ValueError(f"Azure API version is missing. Did you set AZURE_API_VERSION in your env?") + # Set the llm config model_endpoint from model_settings # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config llm_config.model_endpoint = model_settings.azure_base_url diff --git a/letta/providers.py b/letta/providers.py index ac6170629c..8c24b3a950 100644 --- a/letta/providers.py +++ b/letta/providers.py @@ -1,8 +1,12 @@ from typing import List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from letta.constants import LLM_MAX_TOKENS +from letta.llm_api.azure_openai import ( + get_azure_chat_completions_endpoint, + get_azure_embeddings_endpoint, +) from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.llm_config import LLMConfig @@ -244,10 +248,68 @@ def get_model_context_window(self, model_name: str): class AzureProvider(Provider): name: str = "azure" + latest_api_version: str = "2024-09-01-preview" # https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation base_url: str = Field( ..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`." ) api_key: str = Field(..., description="API key for the Azure API.") + api_version: str = Field(latest_api_version, description="API version for the Azure API") + + @model_validator(mode="before") + def set_default_api_version(cls, values): + """ + This ensures that api_version is always set to the default if None is passed in. + """ + if values.get("api_version") is None: + values["api_version"] = cls.model_fields["latest_api_version"].default + return values + + def list_llm_models(self) -> List[LLMConfig]: + from letta.llm_api.azure_openai import ( + azure_openai_get_chat_completion_model_list, + ) + + model_options = azure_openai_get_chat_completion_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version) + configs = [] + for model_option in model_options: + model_name = model_option["id"] + context_window_size = self.get_model_context_window(model_name) + model_endpoint = get_azure_chat_completions_endpoint(self.base_url, model_name, self.api_version) + configs.append( + LLMConfig(model=model_name, model_endpoint_type="azure", model_endpoint=model_endpoint, context_window=context_window_size) + ) + return configs + + def list_embedding_models(self) -> List[EmbeddingConfig]: + from letta.llm_api.azure_openai import azure_openai_get_embeddings_model_list + + model_options = azure_openai_get_embeddings_model_list( + self.base_url, api_key=self.api_key, api_version=self.api_version, require_embedding_in_name=True + ) + configs = [] + for model_option in model_options: + model_name = model_option["id"] + model_endpoint = get_azure_embeddings_endpoint(self.base_url, model_name, self.api_version) + configs.append( + EmbeddingConfig( + embedding_model=model_name, + embedding_endpoint_type="azure", + embedding_endpoint=model_endpoint, + embedding_dim=768, + embedding_chunk_size=300, # NOTE: max is 2048 + ) + ) + return configs + + def get_model_context_window(self, model_name: str): + """ + This is hardcoded for now, since there is no API endpoints to retrieve metadata for a model. + + According to the Azure OpenAI Service REST API documentation: + "The token count of your prompt plus max_tokens can't exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0." + https://learn.microsoft.com/en-us/azure/ai-services/openai/reference + """ + return 2048 class VLLMProvider(OpenAIProvider): diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 493adabf85..412e6483ee 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -35,9 +35,6 @@ class LLMConfig(BaseModel): "hugging-face", ] = Field(..., description="The endpoint type for the model.") model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.") - api_version: Optional[str] = Field( - None, description="The version for the model API. Used by the Azure provider backend, e.g. 2023-03-15-preview." - ) model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.") context_window: int = Field(..., description="The context window size for the model.") diff --git a/letta/server/server.py b/letta/server/server.py index 3ebb63fdc1..c3a7de3c33 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -272,7 +272,13 @@ def __init__( if model_settings.gemini_api_key: self._enabled_providers.append(GoogleAIProvider(api_key=model_settings.gemini_api_key)) if model_settings.azure_api_key and model_settings.azure_base_url: - self._enabled_providers.append(AzureProvider(api_key=model_settings.azure_api_key, base_url=model_settings.azure_base_url)) + self._enabled_providers.append( + AzureProvider( + api_key=model_settings.azure_api_key, + base_url=model_settings.azure_base_url, + api_version=model_settings.azure_api_version, + ) + ) def save_agents(self): """Saves all the agents that are in the in-memory object store""" diff --git a/letta/settings.py b/letta/settings.py index 4f8bb2de2f..1dfd66af35 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -25,6 +25,7 @@ class ModelSettings(BaseSettings): # azure azure_api_key: Optional[str] = None azure_base_url: Optional[str] = None + azure_api_version: Optional[str] = None # google ai gemini_api_key: Optional[str] = None diff --git a/tests/configs/llm_model_configs/azure-gpt-4o-mini.json b/tests/configs/llm_model_configs/azure-gpt-4o-mini.json index 58eb3a00dc..323b2cae9b 100644 --- a/tests/configs/llm_model_configs/azure-gpt-4o-mini.json +++ b/tests/configs/llm_model_configs/azure-gpt-4o-mini.json @@ -2,6 +2,5 @@ "context_window": 128000, "model": "gpt-4o-mini", "model_endpoint_type": "azure", - "api_version": "2023-03-15-preview", "model_wrapper": null } diff --git a/tests/test_providers.py b/tests/test_providers.py index fecacd79c0..89f946149e 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -1,6 +1,11 @@ import os -from letta.providers import AnthropicProvider, GoogleAIProvider, OpenAIProvider +from letta.providers import ( + AnthropicProvider, + AzureProvider, + GoogleAIProvider, + OpenAIProvider, +) def test_openai(): @@ -38,9 +43,17 @@ def test_googleai(): provider.list_embedding_models() +def test_azure_openai(): + + provider = AzureProvider(api_key=os.getenv("AZURE_API_KEY"), base_url=os.getenv("AZURE_BASE_URL")) + provider.list_llm_models() + embeds = provider.list_embedding_models() + print(embeds) + + # # -test_googleai() +# test_googleai() # test_ollama() # test_groq() # test_openai() From ae94bacd7f5689052a2a81ab2c64dd809c76b850 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 8 Oct 2024 14:33:47 -0700 Subject: [PATCH 2/5] Remove Azure tests because missing keys --- tests/test_providers.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/tests/test_providers.py b/tests/test_providers.py index 89f946149e..27fd466e6a 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -1,11 +1,6 @@ import os -from letta.providers import ( - AnthropicProvider, - AzureProvider, - GoogleAIProvider, - OpenAIProvider, -) +from letta.providers import AnthropicProvider, GoogleAIProvider, OpenAIProvider def test_openai(): @@ -43,14 +38,6 @@ def test_googleai(): provider.list_embedding_models() -def test_azure_openai(): - - provider = AzureProvider(api_key=os.getenv("AZURE_API_KEY"), base_url=os.getenv("AZURE_BASE_URL")) - provider.list_llm_models() - embeds = provider.list_embedding_models() - print(embeds) - - # # # test_googleai() From 8322f06825bcdb6da1cd32ad793b3444c30d868d Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 8 Oct 2024 14:40:44 -0700 Subject: [PATCH 3/5] Add tracking ticket --- tests/test_providers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_providers.py b/tests/test_providers.py index 27fd466e6a..6d472924c3 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -30,6 +30,14 @@ def test_anthropic(): # print(models) # # + + +# TODO: Add this test +# https://linear.app/letta/issue/LET-159/add-tests-for-azure-openai-in-test-providerspy-and-test-endpointspy +def test_azure(): + pass + + def test_googleai(): provider = GoogleAIProvider(api_key=os.getenv("GEMINI_API_KEY")) models = provider.list_llm_models() From efb9e3d57ad1af87b5f507ee44bda70c96093d1d Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 8 Oct 2024 14:49:48 -0700 Subject: [PATCH 4/5] Create hardcoded map of context window size --- letta/llm_api/azure_openai_constants.py | 10 ++++++++++ letta/providers.py | 7 ++----- 2 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 letta/llm_api/azure_openai_constants.py diff --git a/letta/llm_api/azure_openai_constants.py b/letta/llm_api/azure_openai_constants.py new file mode 100644 index 0000000000..c3ac60e4a9 --- /dev/null +++ b/letta/llm_api/azure_openai_constants.py @@ -0,0 +1,10 @@ +AZURE_MODEL_TO_CONTEXT_LENGTH = { + "babbage-002": 16384, + "davinci-002": 16384, + "gpt-35-turbo-0613": 4096, + "gpt-35-turbo-1106": 16385, + "gpt-35-turbo-0125": 16385, + "gpt-4-0613": 8192, + "gpt-4o-mini-2024-07-18": 128000, + "gpt-4o-2024-08-06": 128000, +} diff --git a/letta/providers.py b/letta/providers.py index 8c24b3a950..bcccd1539c 100644 --- a/letta/providers.py +++ b/letta/providers.py @@ -7,6 +7,7 @@ get_azure_chat_completions_endpoint, get_azure_embeddings_endpoint, ) +from letta.llm_api.azure_openai_constants import AZURE_MODEL_TO_CONTEXT_LENGTH from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.llm_config import LLMConfig @@ -304,12 +305,8 @@ def list_embedding_models(self) -> List[EmbeddingConfig]: def get_model_context_window(self, model_name: str): """ This is hardcoded for now, since there is no API endpoints to retrieve metadata for a model. - - According to the Azure OpenAI Service REST API documentation: - "The token count of your prompt plus max_tokens can't exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0." - https://learn.microsoft.com/en-us/azure/ai-services/openai/reference """ - return 2048 + return AZURE_MODEL_TO_CONTEXT_LENGTH.get(model_name, 4096) class VLLMProvider(OpenAIProvider): From da5227fb50358f0f45cf822c2bfa71cbeb0053ac Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 8 Oct 2024 15:02:16 -0700 Subject: [PATCH 5/5] Run formatting --- tests/test_providers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_providers.py b/tests/test_providers.py index 2fa235b9e1..684fed5fbd 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -30,12 +30,13 @@ def test_anthropic(): # # + # TODO: Add this test # https://linear.app/letta/issue/LET-159/add-tests-for-azure-openai-in-test-providerspy-and-test-endpointspy def test_azure(): pass - + def test_ollama(): provider = OllamaProvider(base_url=os.getenv("OLLAMA_BASE_URL")) models = provider.list_llm_models()