From 0ffdb6dd20f3cf45445b69d80aa93f793faf222d Mon Sep 17 00:00:00 2001
From: Kevin Conner <connerk@gmail.com>
Date: Sun, 24 Nov 2024 16:37:37 -0800
Subject: [PATCH] Fix object property value in mlx_lm.server chat completions
 response to match OpenAI spec (#1119)

These were "chat.completions" and "chat.completions.chunk"
but should be "chat.completion" and "chat.completion.chunk"
for compatibility with clients expecting an OpenAI API.

In particular, this solves a problem in which aider 0.64.1 reports
hitting a token limit on any completion request, no matter how small,
despite apparently correct counts in the usage property.

Refer to:

https://platform.openai.com/docs/api-reference/chat/object

> object string
> The object type, which is always chat.completion.

https://platform.openai.com/docs/api-reference/chat/streaming

> object string
> The object type, which is always chat.completion.chunk.
---
 llms/mlx_lm/SERVER.md | 2 +-
 llms/mlx_lm/server.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/llms/mlx_lm/SERVER.md b/llms/mlx_lm/SERVER.md
index 2976a09fc..e544c6fa3 100644
--- a/llms/mlx_lm/SERVER.md
+++ b/llms/mlx_lm/SERVER.md
@@ -92,7 +92,7 @@ curl localhost:8080/v1/chat/completions \
 
 - `system_fingerprint`: A unique identifier for the system.
 
-- `object`: Any of "chat.completions", "chat.completions.chunk" (for
+- `object`: Any of "chat.completion", "chat.completion.chunk" (for
   streaming), or "text.completion".
 
 - `model`: The model repo or path (e.g. `"mlx-community/Llama-3.2-3B-Instruct-4bit"`).
diff --git a/llms/mlx_lm/server.py b/llms/mlx_lm/server.py
index badc6dd37..ce09cf45c 100644
--- a/llms/mlx_lm/server.py
+++ b/llms/mlx_lm/server.py
@@ -589,9 +589,7 @@ def handle_chat_completions(self) -> List[int]:
 
         # Determine response type
         self.request_id = f"chatcmpl-{uuid.uuid4()}"
-        self.object_type = (
-            "chat.completions.chunk" if self.stream else "chat.completions"
-        )
+        self.object_type = "chat.completion.chunk" if self.stream else "chat.completion"
         if (
             hasattr(self.tokenizer, "apply_chat_template")
             and self.tokenizer.chat_template