LiteLLM Minor Fixes & Improvements (11/19/2024) (#6820)

* fix(anthropic/chat/transformation.py): add json schema as values: json_schema fixes passing pydantic obj to anthropic Fixes #6766 * (feat): Add timestamp_granularities parameter to transcription API (#6457) * Add timestamp_granularities parameter to transcription API * add param to the local test * fix(databricks/chat.py): handle max_retries optional param handling for openai-like calls Fixes issue with calling finetuned vertex ai models via databricks route * build(ui/): add team admins via proxy ui * fix: fix linting error * test: fix test * docs(vertex.md): refactor docs * test: handle overloaded anthropic model error * test: remove duplicate test * test: fix test * test: update test to handle model overloaded error --------- Co-authored-by: Show <35062952+BrunooShow@users.noreply.github.com>
BerriAI · Nov 20, 2024 · b0be5bf · b0be5bf
1 parent 7d0e1f0
commit b0be5bf
Show file tree

Hide file tree

Showing 15 changed files with 200 additions and 193 deletions.
diff --git a/docs/my-website/docs/providers/vertex.md b/docs/my-website/docs/providers/vertex.md
@@ -572,6 +572,96 @@ Here's how to use Vertex AI with the LiteLLM Proxy Server
 
   </Tabs>
 
+
+## Authentication - vertex_project, vertex_location, etc. 
+
+Set your vertex credentials via:
+- dynamic params
+OR
+- env vars 
+
+
+### **Dynamic Params**
+
+You can set:
+- `vertex_credentials` (str) - can be a json string or filepath to your vertex ai service account.json
+- `vertex_location` (str) - place where vertex model is deployed (us-central1, asia-southeast1, etc.)
+- `vertex_project` Optional[str] - use if vertex project different from the one in vertex_credentials
+
+as dynamic params for a `litellm.completion` call. 
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+import json 
+
+## GET CREDENTIALS 
+file_path = 'path/to/vertex_ai_service_account.json'
+
+# Load the JSON file
+with open(file_path, 'r') as file:
+    vertex_credentials = json.load(file)
+
+# Convert to JSON string
+vertex_credentials_json = json.dumps(vertex_credentials)
+
+
+response = completion(
+  model="vertex_ai/gemini-pro",
+  messages=[{"content": "You are a good bot.","role": "system"}, {"content": "Hello, how are you?","role": "user"}], 
+  vertex_credentials=vertex_credentials_json,
+  vertex_project="my-special-project", 
+  vertex_location="my-special-location"
+)
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+```yaml
+model_list:
+    - model_name: gemini-1.5-pro
+      litellm_params:
+        model: gemini-1.5-pro
+        vertex_credentials: os.environ/VERTEX_FILE_PATH_ENV_VAR # os.environ["VERTEX_FILE_PATH_ENV_VAR"] = "/path/to/service_account.json" 
+        vertex_project: "my-special-project"
+        vertex_location: "my-special-location:
+```
+
+</TabItem>
+</Tabs>
+
+
+
+
+### **Environment Variables**
+
+You can set:
+- `GOOGLE_APPLICATION_CREDENTIALS` - store the filepath for your service_account.json in here (used by vertex sdk directly).
+- VERTEXAI_LOCATION - place where vertex model is deployed (us-central1, asia-southeast1, etc.)
+- VERTEXAI_PROJECT - Optional[str] - use if vertex project different from the one in vertex_credentials
+
+1. GOOGLE_APPLICATION_CREDENTIALS
+
+```bash
+export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service_account.json"
+```
+
+2. VERTEXAI_LOCATION
+
+```bash
+export VERTEXAI_LOCATION="us-central1" # can be any vertex location
+```
+
+3. VERTEXAI_PROJECT
+
+```bash
+export VERTEXAI_PROJECT="my-test-project" # ONLY use if model project is different from service account project
+```
+
+
 ## Specifying Safety Settings 
 In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
 
@@ -2303,97 +2393,6 @@ print("response from proxy", response)
 </TabItem>
 </Tabs>
 
-
-
-## Authentication - vertex_project, vertex_location, etc. 
-
-Set your vertex credentials via:
-- dynamic params
-OR
-- env vars 
-
-
-### **Dynamic Params**
-
-You can set:
-- `vertex_credentials` (str) - can be a json string or filepath to your vertex ai service account.json
-- `vertex_location` (str) - place where vertex model is deployed (us-central1, asia-southeast1, etc.)
-- `vertex_project` Optional[str] - use if vertex project different from the one in vertex_credentials
-
-as dynamic params for a `litellm.completion` call. 
-
-<Tabs>
-<TabItem value="sdk" label="SDK">
-
-```python
-from litellm import completion
-import json 
-
-## GET CREDENTIALS 
-file_path = 'path/to/vertex_ai_service_account.json'
-
-# Load the JSON file
-with open(file_path, 'r') as file:
-    vertex_credentials = json.load(file)
-
-# Convert to JSON string
-vertex_credentials_json = json.dumps(vertex_credentials)
-
-
-response = completion(
-  model="vertex_ai/gemini-pro",
-  messages=[{"content": "You are a good bot.","role": "system"}, {"content": "Hello, how are you?","role": "user"}], 
-  vertex_credentials=vertex_credentials_json,
-  vertex_project="my-special-project", 
-  vertex_location="my-special-location"
-)
-```
-
-</TabItem>
-<TabItem value="proxy" label="PROXY">
-
-```yaml
-model_list:
-    - model_name: gemini-1.5-pro
-      litellm_params:
-        model: gemini-1.5-pro
-        vertex_credentials: os.environ/VERTEX_FILE_PATH_ENV_VAR # os.environ["VERTEX_FILE_PATH_ENV_VAR"] = "/path/to/service_account.json" 
-        vertex_project: "my-special-project"
-        vertex_location: "my-special-location:
-```
-
-</TabItem>
-</Tabs>
-
-
-
-
-### **Environment Variables**
-
-You can set:
-- `GOOGLE_APPLICATION_CREDENTIALS` - store the filepath for your service_account.json in here (used by vertex sdk directly).
-- VERTEXAI_LOCATION - place where vertex model is deployed (us-central1, asia-southeast1, etc.)
-- VERTEXAI_PROJECT - Optional[str] - use if vertex project different from the one in vertex_credentials
-
-1. GOOGLE_APPLICATION_CREDENTIALS
-
-```bash
-export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service_account.json"
-```
-
-2. VERTEXAI_LOCATION
-
-```bash
-export VERTEXAI_LOCATION="us-central1" # can be any vertex location
-```
-
-3. VERTEXAI_PROJECT
-
-```bash
-export VERTEXAI_PROJECT="my-test-project" # ONLY use if model project is different from service account project
-```
-
-
 ## Extra
 
 ### Using `GOOGLE_APPLICATION_CREDENTIALS`

diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
@@ -374,7 +374,7 @@ def _create_json_tool_call_for_response_format(
             _input_schema["additionalProperties"] = True
             _input_schema["properties"] = {}
         else:
-            _input_schema["properties"] = json_schema
+            _input_schema["properties"] = {"values": json_schema}
 
         _tool = AnthropicMessagesTool(name="json_tool_call", input_schema=_input_schema)
         return _tool

diff --git a/litellm/llms/databricks/chat.py b/litellm/llms/databricks/chat.py
@@ -470,6 +470,9 @@ def completion(
                 optional_params[k] = v
 
         stream: bool = optional_params.get("stream", None) or False
+        optional_params.pop(
+            "max_retries", None
+        )  # [TODO] add max retry support at llm api call level
         optional_params["stream"] = stream
 
         data = {

diff --git a/litellm/main.py b/litellm/main.py
@@ -4728,6 +4728,7 @@ def transcription(
     response_format: Optional[
         Literal["json", "text", "srt", "verbose_json", "vtt"]
     ] = None,
+    timestamp_granularities: Optional[List[Literal["word", "segment"]]] = None,
     temperature: Optional[int] = None,  # openai defaults this to 0
     ## LITELLM PARAMS ##
     user: Optional[str] = None,
@@ -4777,6 +4778,7 @@ def transcription(
         language=language,
         prompt=prompt,
         response_format=response_format,
+        timestamp_granularities=timestamp_granularities,
         temperature=temperature,
         custom_llm_provider=custom_llm_provider,
         drop_params=drop_params,

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -1884,7 +1884,8 @@
         "supports_vision": true,
         "tool_use_system_prompt_tokens": 264,
         "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "claude-3-5-haiku-20241022": {
         "max_tokens": 8192,
@@ -1900,7 +1901,8 @@
         "tool_use_system_prompt_tokens": 264,
         "supports_assistant_prefill": true,
         "supports_prompt_caching": true,
-        "supports_pdf_input": true
+        "supports_pdf_input": true,
+        "supports_response_schema": true
     },
     "claude-3-opus-20240229": {
         "max_tokens": 4096,
@@ -1916,7 +1918,8 @@
         "supports_vision": true,
         "tool_use_system_prompt_tokens": 395,
         "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "claude-3-sonnet-20240229": {
         "max_tokens": 4096,
@@ -1930,7 +1933,8 @@
         "supports_vision": true,
         "tool_use_system_prompt_tokens": 159,
         "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "claude-3-5-sonnet-20240620": {
         "max_tokens": 8192,
@@ -1946,7 +1950,8 @@
         "supports_vision": true,
         "tool_use_system_prompt_tokens": 159,
         "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "claude-3-5-sonnet-20241022": {
         "max_tokens": 8192,
@@ -1962,7 +1967,8 @@
         "supports_vision": true,
         "tool_use_system_prompt_tokens": 159,
         "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "text-bison": {
         "max_tokens": 2048,
@@ -3864,22 +3870,6 @@
         "supports_function_calling": true,
         "tool_use_system_prompt_tokens": 264
     },
-    "anthropic/claude-3-5-sonnet-20241022": {
-        "max_tokens": 8192,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "cache_creation_input_token_cost": 0.00000375,
-        "cache_read_input_token_cost": 0.0000003,
-        "litellm_provider": "anthropic",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "tool_use_system_prompt_tokens": 159,
-        "supports_assistant_prefill": true,
-        "supports_prompt_caching": true
-    },
     "openrouter/anthropic/claude-3.5-sonnet": {
         "max_tokens": 8192,
         "max_input_tokens": 200000,

diff --git a/litellm/utils.py b/litellm/utils.py
@@ -2125,6 +2125,7 @@ def get_optional_params_transcription(
     prompt: Optional[str] = None,
     response_format: Optional[str] = None,
     temperature: Optional[int] = None,
+    timestamp_granularities: Optional[List[Literal["word", "segment"]]] = None,
     custom_llm_provider: Optional[str] = None,
     drop_params: Optional[bool] = None,
     **kwargs,