Merge branch 'main' of github.com:Future-House/ldp into buffers

Future-House · Dec 4, 2024 · c3cd600 · c3cd600
2 parents c0091e0 + ec05261
commit c3cd600
Show file tree

Hide file tree

Showing 8 changed files with 540 additions and 69 deletions.
diff --git a/ldp/llms/chat.py b/ldp/llms/chat.py
@@ -1,8 +1,8 @@
 import asyncio
 import json
-from collections.abc import AsyncGenerator, Callable, Iterable
+from collections.abc import AsyncGenerator, Callable, Iterable, Mapping
 from datetime import datetime
-from typing import Any, ClassVar, Self, cast
+from typing import Any, ClassVar, Self, TypeAlias, cast
 from uuid import UUID, uuid4
 
 import litellm
@@ -15,6 +15,10 @@
 )
 from pydantic import BaseModel, ConfigDict, Field, ValidationError, model_validator
 
+# Yes, this is a hack, it mostly matches
+# https://github.com/python-jsonschema/referencing/blob/v0.35.1/referencing/jsonschema.py#L20-L21
+JSONSchema: TypeAlias = Mapping[str, Any]
+
 
 class JSONSchemaValidationError(ValueError):
     """Raised when the completion does not match the specified schema."""
@@ -87,13 +91,13 @@ def sum_logprobs(choice: litellm.utils.Choices) -> float | None:
 
 
 def validate_json_completion(
-    completion: litellm.ModelResponse, output_type: type[BaseModel]
+    completion: litellm.ModelResponse, output_type: type[BaseModel] | JSONSchema
 ) -> None:
     """Validate a completion against a JSON schema.
 
     Args:
         completion: The completion to validate.
-        output_type: The Pydantic model to validate the completion against.
+        output_type: A JSON schema or a Pydantic model to validate the completion.
     """
     try:
         for choice in completion.choices:
@@ -105,7 +109,12 @@ def validate_json_completion(
             choice.message.content = (
                 choice.message.content.split("```json")[-1].split("```")[0] or ""
             )
-            output_type.model_validate_json(choice.message.content)
+            if isinstance(output_type, Mapping):  # JSON schema
+                litellm.litellm_core_utils.json_validation_rule.validate_schema(
+                    schema=dict(output_type), response=choice.message.content
+                )
+            else:
+                output_type.model_validate_json(choice.message.content)
     except ValidationError as err:
         raise JSONSchemaValidationError(
             "The completion does not match the specified schema."
@@ -173,7 +182,7 @@ async def call(  # noqa: C901, PLR0915
         self,
         messages: list[Message],
         callbacks: list[Callable] | None = None,
-        output_type: type[BaseModel] | None = None,
+        output_type: type[BaseModel] | JSONSchema | None = None,
         tools: list[Tool] | None = None,
         tool_choice: Tool | str | None = TOOL_CHOICE_REQUIRED,
         **chat_kwargs,
@@ -198,8 +207,17 @@ async def call(  # noqa: C901, PLR0915
                     else tool_choice
                 )
 
-        # deal with specifying output type
-        if output_type is not None:
+        if isinstance(output_type, Mapping):  # Use structured outputs
+            chat_kwargs["response_format"] = {
+                "type": "json_schema",
+                "json_schema": {
+                    "strict": True,
+                    # SEE: https://platform.openai.com/docs/guides/structured-outputs#additionalproperties-false-must-always-be-set-in-objects
+                    "schema": dict(output_type) | {"additionalProperties": False},
+                    "name": output_type["title"],  # Required by OpenAI as of 12/3/2024
+                },
+            }
+        elif output_type is not None:  # Use JSON mode
             schema = json.dumps(output_type.model_json_schema(mode="serialization"))
             schema_msg = f"Respond following this JSON schema:\n\n{schema}"
             # Get the system prompt and its index, or the index to add it

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ requires = ["setuptools>=64", "setuptools_scm>=8"]
 
 [dependency-groups]
 codeflash = [
-    "codeflash>=0.7",  # Pin to keep recent
+    "codeflash>=0.8",  # Pin for --verify-setup checking formatter-cmds
     "ldp[dev]",
 ]
 dev = ["ldp[dev]"]
@@ -97,7 +97,7 @@ preview = true
 [tool.codeflash]
 disable-imports-sorting = true
 disable-telemetry = true
-formatter-cmds = ["ruff check --exit-zero --fix $file", "ruff format $file"]
+formatter-cmds = ["uv tool run ruff check --exit-zero --fix $file", "uv tool run ruff format $file"]
 module-root = "ldp"
 test-framework = "pytest"
 tests-root = "tests"

diff --git a/...ttes/TestLLMModel.test_output_schema.yaml → ...MModel.test_output_schema[json-mode].yaml b/...ttes/TestLLMModel.test_output_schema.yaml → ...MModel.test_output_schema[json-mode].yaml
@@ -1,12 +1,11 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"role": "system", "content": "Respond following this JSON
-        schema:\n\n{\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"},
-        \"age\": {\"title\": \"Age\", \"type\": \"integer\"}}, \"required\": [\"name\",
-        \"age\"], \"title\": \"DummyOutputSchema\", \"type\": \"object\"}"}, {"role":
-        "user", "content": "My name is Claude and I am 1 year old. What is my name and
-        age?"}], "model": "gpt-3.5-turbo", "response_format": {"type": "json_object"}}'
+        '{"messages":[{"role":"system","content":"Respond following this JSON schema:\n\n{\"properties\":
+        {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"age\": {\"title\":
+        \"Age\", \"type\": \"integer\"}}, \"required\": [\"name\", \"age\"], \"title\":
+        \"DummyOutputSchema\", \"type\": \"object\"}"},{"role":"user","content":"My
+        name is Claude and I am 1 year old. What is my name and age?"}],"model":"gpt-3.5-turbo","response_format":{"type":"json_object"}}'
       headers:
         accept:
           - application/json
@@ -15,13 +14,13 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "472"
+          - "459"
         content-type:
           - application/json
         host:
           - api.openai.com
         user-agent:
-          - AsyncOpenAI/Python 1.54.3
+          - AsyncOpenAI/Python 1.55.3
         x-stainless-arch:
           - arm64
         x-stainless-async:
@@ -31,11 +30,11 @@ interactions:
         x-stainless-os:
           - MacOS
         x-stainless-package-version:
-          - 1.54.3
+          - 1.55.3
         x-stainless-raw-response:
           - "true"
         x-stainless-retry-count:
-          - "1"
+          - "0"
         x-stainless-runtime:
           - CPython
         x-stainless-runtime-version:
@@ -45,28 +44,34 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAA4ySQYvbMBCF7/4VYs7JEm/Ybte3speytJTdPbSkLkaWJrZaWSOkMbSE/PcixYkd
-          2kIvPrxv5vHeWIdCCDAaKgGql6wGb9fvXl71tm2fnvvnzy/vd/hp9+V19/ToPm74A8EqbVD7HRWf
-          t24UDd4iG3InrAJKxuRa3m/LcvP29s1DBgNptGmt87ze3tyteQwtrTfl7d202ZNRGKESXwshhDjk
-          b8roNP6ESmxWZ2XAGGWHUF2GhIBANikgYzSRpWNYzVCRY3Q59qF2SarByQFrqEQNj1aOGmtYnZHs
-          Milrd1y6BNyPUaYWbrR20o+XWJY6H6iNE7/oe+NM7JuAMpJLESKTh0yPhRDfcv3xqhH4QIPnhukH
-          umT4sD3ZwXzwGZb3E2RiaRd6Od3s2q7RyNLYuLgfKKl61PPqfGw5akMLUCxK/5nmb96n4sZ1/2M/
-          A6XQM+rGB9RGXTeexwKm9/ivscuRc2CIvyLj0OyN6zD4YPKLyP/yWPwGAAD//wMAcNRyBBADAAA=
+          H4sIAAAAAAAAA4ySQWvjMBCF7/4VYs5JqR1Ktr5ld1lKC720lJa6GEWaOOrKkpBGdJeQ/75IcWKH
+          7sJefHjfzOO9sXYFY6Ak1AzElpPonZ6v+PPdU/d++/0+fPmIq+opPt483/zAh/j1pYJZ2rDrdxR0
+          3LoQtncaSVlzwMIjJ0yu5XKxqJZldV1m0FuJOq11juaLi6s5Rb+288uyuho2t1YJDFCz14Ixxnb5
+          mzIaib+gZpezo9JjCLxDqE9DjIG3OinAQ1CBuCGYjVBYQ2hy7F1jktSA4T02ULMGvmkeJTYwOyLe
+          ZVI2Zj918biJgacWJmo96PtTLG075+06DPykb5RRYdt65MGaFCGQdZDpvmDsLdePZ43Aeds7asn+
+          RJMMrxcHOxgPPsJyOUCyxPVEL4ebndu1EokrHSb3A8HFFuW4Oh6bR6nsBBST0p/T/M37UFyZ7n/s
+          RyAEOkLZOo9SifPG45jH9B7/NXY6cg4M4Xcg7NuNMh1651V+Eflf7os/AAAA//8DAOtt/IcQAwAA
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8df9511e2831942c-SJC
+          - 8ec79940487acf0a-SJC
         Connection:
           - keep-alive
         Content-Encoding:
           - gzip
         Content-Type:
           - application/json
         Date:
-          - Fri, 08 Nov 2024 23:24:30 GMT
+          - Wed, 04 Dec 2024 00:14:51 GMT
         Server:
           - cloudflare
+        Set-Cookie:
+          - __cf_bm=Jt74XtlzTIX4qJ2I6JWts_1PUt6239EzTEKob0gbMYI-1733271291-1.0.1.1-Q2lRwwtee9d00lyYx_vjY4jpuRYySoyrIe_R4yj2LqAaxIDQLNQ99FxRmToXDEeMUPk5tUL1416FgdY7MIAURA;
+            path=/; expires=Wed, 04-Dec-24 00:44:51 GMT; domain=.api.openai.com; HttpOnly;
+            Secure; SameSite=None
+          - _cfuvid=LgL7hRr_QJbzVlAiPhNhtvybE1Xb67FkF.lr0Cryt2I-1733271291893-0.0.1.1-604800000;
+            path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
         Transfer-Encoding:
           - chunked
         X-Content-Type-Options:
@@ -78,7 +83,7 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "335"
+          - "255"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
@@ -96,7 +101,7 @@ interactions:
         x-ratelimit-reset-tokens:
           - 5ms
         x-request-id:
-          - req_d402f5beca04001c51888e5bdd5b65e9
+          - req_9f36e635cd12593e29892e9b24310fc6
       status:
         code: 200
         message: OK

diff --git a/...ompletionLLMModel.test_output_schema.yaml → ...st_output_schema[structured-outputs].yaml b/...ompletionLLMModel.test_output_schema.yaml → ...st_output_schema[structured-outputs].yaml
@@ -1,12 +1,8 @@
 interactions:
   - request:
       body:
-        '{"messages": [{"role": "system", "content": "Respond following this JSON
-        schema:\n\n{\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"},
-        \"age\": {\"title\": \"Age\", \"type\": \"integer\"}}, \"required\": [\"name\",
-        \"age\"], \"title\": \"DummyOutputSchema\", \"type\": \"object\"}"}, {"role":
-        "user", "content": "My name is Claude and I am 1 year old. What is my name and
-        age?"}], "model": "gpt-3.5-turbo", "n": 2, "response_format": {"type": "json_object"}}'
+        '{"messages":[{"role":"user","content":"My name is Claude and I am 1 year
+        old. What is my name and age?"}],"model":"gpt-4o","response_format":{"type":"json_schema","json_schema":{"strict":true,"schema":{"properties":{"name":{"title":"Name","type":"string"},"age":{"title":"Age","type":"integer"}},"required":["name","age"],"title":"DummyOutputSchema","type":"object","additionalProperties":false},"name":"DummyOutputSchema"}}}'
       headers:
         accept:
           - application/json
@@ -15,13 +11,13 @@ interactions:
         connection:
           - keep-alive
         content-length:
-          - "480"
+          - "425"
         content-type:
           - application/json
         host:
           - api.openai.com
         user-agent:
-          - AsyncOpenAI/Python 1.54.3
+          - AsyncOpenAI/Python 1.55.3
         x-stainless-arch:
           - arm64
         x-stainless-async:
@@ -31,11 +27,11 @@ interactions:
         x-stainless-os:
           - MacOS
         x-stainless-package-version:
-          - 1.54.3
+          - 1.55.3
         x-stainless-raw-response:
           - "true"
         x-stainless-retry-count:
-          - "1"
+          - "0"
         x-stainless-runtime:
           - CPython
         x-stainless-runtime-version:
@@ -45,27 +41,26 @@ interactions:
     response:
       body:
         string: !!binary |
-          H4sIAAAAAAAAA9xTTU/jMBC951dYc05R0y50yW0BcUXABYmsIteepqaOx7In0kLV/46cfiQVrLTn
-          veTw3ryn9zyTbSYEGA2lALWWrFpvJ7+envX00bJe3ty+ULy7Xt0/LR7U483mQ3rIk4KWb6j4qLpQ
-          1HqLbMjtaRVQMibXYjEviunP2dVVT7Sk0SZZ43kyv7iccBeWNJkWs8uDck1GYYRSvGZCCLHtvymj
-          0/gHSjHNj0iLMcoGoTwNCQGBbEJAxmgiS8eQD6Qix+j62NvKJagCJ1usoBQV3FrZaawgP1Ky6Zmi
-          cruxS8BVF2Vq4TprD/juFMtS4wMt44E/4SvjTFzXAWUklyJEJg/ZSPyla/E/ds2E+N2vujtrBD5Q
-          67lm2qBLhtfzvR0MxzWQ8x8HkomlHfBitsi/sas1sjQ2jt4PlFRr1IN0OCzZaUMjYryjr2m+894X
-          N675F/uBUAo9o659QG3UeeNhLGD69/42dnrkPjDE98jY1ivjGgw+mP4i+l3usk8AAAD//wMAxIib
-          wfwDAAA=
+          H4sIAAAAAAAAA4xSu07EMBDs8xXW1gkKCeKOdAgqKCgoeAVFPnuTMzhey3YE6HT/jpx75E6ARONi
+          Zmc8s/YqYQyUhIqBWPIgequzS/748PQh6AKL+5uebOvubuXl1/XzQKWFNCpo8YYi7FQngnqrMSgy
+          G1o45AGj6+msLItZkZfzkehJoo6yzobsjLIiL86yfJ7l51vhkpRADxV7SRhjbDWeMaKR+AkVy9Md
+          0qP3vEOo9kOMgSMdEeDeKx+4CZBOpCAT0IypVzUY3mMNVQ1Xmg8Sa0hr4F2ETteHKoft4HkMbQat
+          t/h6H0NTZx0t/Jbf460yyi8bh9yTiVf6QBZGdp0w9jrWHY4agHXU29AEekcTDWcXGzuY9juROy5Q
+          4HqC5/P0F7NGYuBK+4NtgeBiiXJSTqvlg1R0QCQHlX9m+c17U1uZ7j/2EyEE2oCysQ6lEsd9pzGH
+          8fP9NbZf8RgY/JcP2DetMh0669Tm/VvbzNrzBZbYLnJI1sk3AAAA//8DAEganQUIAwAA
       headers:
         CF-Cache-Status:
           - DYNAMIC
         CF-RAY:
-          - 8df95108afcf67c1-SJC
+          - 8ec7ab7ffe551679-SJC
         Connection:
           - keep-alive
         Content-Encoding:
           - gzip
         Content-Type:
           - application/json
         Date:
-          - Fri, 08 Nov 2024 23:24:26 GMT
+          - Wed, 04 Dec 2024 00:27:18 GMT
         Server:
           - cloudflare
         Transfer-Encoding:
@@ -79,25 +74,25 @@ interactions:
         openai-organization:
           - future-house-xr4tdh
         openai-processing-ms:
-          - "318"
+          - "367"
         openai-version:
           - "2020-10-01"
         strict-transport-security:
           - max-age=31536000; includeSubDomains; preload
         x-ratelimit-limit-requests:
-          - "12000"
+          - "10000"
         x-ratelimit-limit-tokens:
-          - "1000000"
+          - "30000000"
         x-ratelimit-remaining-requests:
-          - "11999"
+          - "9998"
         x-ratelimit-remaining-tokens:
-          - "999894"
+          - "29999967"
         x-ratelimit-reset-requests:
-          - 5ms
-        x-ratelimit-reset-tokens:
           - 6ms
+        x-ratelimit-reset-tokens:
+          - 0s
         x-request-id:
-          - req_61e43e008d14a5da07c312853987c183
+          - req_b1f66bf72beec8f42091a30db5131d27
       status:
         code: 200
         message: OK