format cleanup

vllm-project · Sep 24, 2024 · edcc183 · edcc183
1 parent fa90d4f
commit edcc183
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 20 deletions.
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
@@ -22,8 +22,8 @@
     ChatCompletionRequest, ChatCompletionResponse,
     ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
     ChatCompletionStreamResponse, ChatMessage, DeltaFunctionCall, DeltaMessage,
-    DeltaToolCall, ErrorResponse, FunctionCall, ToolCall, UsageInfo,
-    RequestResponseMetadata)
+    DeltaToolCall, ErrorResponse, FunctionCall, RequestResponseMetadata,
+    ToolCall, UsageInfo)
 from vllm.entrypoints.openai.serving_engine import (BaseModelPath,
                                                     LoRAModulePath,
                                                     OpenAIServing,
@@ -589,12 +589,12 @@ async def chat_completion_stream_generator(
                     exclude_unset=True, exclude_none=True))
                 yield f"data: {final_usage_data}\n\n"
 
-            # report to FastAPI middleware aggregate number of completion tokens (across all choices)
+            # report to FastAPI middleware aggregate usage across all choices
             num_completion_tokens = sum(previous_num_tokens)
             request_metadata.final_usage_info = UsageInfo(
-                                      prompt_tokens=num_prompt_tokens,
-                                      completion_tokens=num_completion_tokens,
-                                      total_tokens=num_prompt_tokens+num_completion_tokens)
+                prompt_tokens=num_prompt_tokens,
+                completion_tokens=num_completion_tokens,
+                total_tokens=num_prompt_tokens + num_completion_tokens)
 
         except ValueError as e:
             # TODO: Use a vllm-specific Validation Error

diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
@@ -18,8 +18,9 @@
                                               CompletionResponseChoice,
                                               CompletionResponseStreamChoice,
                                               CompletionStreamResponse,
-                                              ErrorResponse, UsageInfo,
-                                              RequestResponseMetadata)
+                                              ErrorResponse,
+                                              RequestResponseMetadata,
+                                              UsageInfo)
 # yapf: enable
 from vllm.entrypoints.openai.serving_engine import (BaseModelPath,
                                                     LoRAModulePath,
@@ -170,14 +171,15 @@ async def create_completion(
 
         # Streaming response
         if stream:
-            return self.completion_stream_generator(request,
-                                                    result_generator,
-                                                    request_id,
-                                                    created_time,
-                                                    model_name,
-                                                    num_prompts=len(prompts),
-                                                    tokenizer=tokenizer,
-                                                    request_metadata=request_metadata)
+            return self.completion_stream_generator(
+                request,
+                result_generator,
+                request_id,
+                created_time,
+                model_name,
+                num_prompts=len(prompts),
+                tokenizer=tokenizer,
+                request_metadata=request_metadata)
 
         # Non-streaming response
         final_res_batch: List[Optional[RequestOutput]] = [None] * len(prompts)
@@ -354,12 +356,13 @@ async def completion_stream_generator(
                     exclude_unset=False, exclude_none=True))
                 yield f"data: {final_usage_data}\n\n"
 
-            # report to FastAPI middleware aggregate tokens (all prompts, all completions)
+            # report to FastAPI middleware aggregate usage across all choices
             total_prompt_tokens = sum(num_prompt_tokens)
             total_completion_tokens = sum(previous_num_tokens)
-            request_metadata.final_usage_info = UsageInfo(prompt_tokens=total_prompt_tokens,
-                                                           completion_tokens=total_completion_tokens,
-                                                           total_tokens=total_prompt_tokens + total_completion_tokens)
+            request_metadata.final_usage_info = UsageInfo(
+                prompt_tokens=total_prompt_tokens,
+                completion_tokens=total_completion_tokens,
+                total_tokens=total_prompt_tokens + total_completion_tokens)
 
         except ValueError as e:
             # TODO: Use a vllm-specific Validation Error