Skip to content

Commit

Permalink
format cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
agt committed Sep 24, 2024
1 parent fa90d4f commit edcc183
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 20 deletions.
12 changes: 6 additions & 6 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
ChatCompletionRequest, ChatCompletionResponse,
ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
ChatCompletionStreamResponse, ChatMessage, DeltaFunctionCall, DeltaMessage,
DeltaToolCall, ErrorResponse, FunctionCall, ToolCall, UsageInfo,
RequestResponseMetadata)
DeltaToolCall, ErrorResponse, FunctionCall, RequestResponseMetadata,
ToolCall, UsageInfo)
from vllm.entrypoints.openai.serving_engine import (BaseModelPath,
LoRAModulePath,
OpenAIServing,
Expand Down Expand Up @@ -589,12 +589,12 @@ async def chat_completion_stream_generator(
exclude_unset=True, exclude_none=True))
yield f"data: {final_usage_data}\n\n"

# report to FastAPI middleware aggregate number of completion tokens (across all choices)
# report to FastAPI middleware aggregate usage across all choices
num_completion_tokens = sum(previous_num_tokens)
request_metadata.final_usage_info = UsageInfo(
prompt_tokens=num_prompt_tokens,
completion_tokens=num_completion_tokens,
total_tokens=num_prompt_tokens+num_completion_tokens)
prompt_tokens=num_prompt_tokens,
completion_tokens=num_completion_tokens,
total_tokens=num_prompt_tokens + num_completion_tokens)

except ValueError as e:
# TODO: Use a vllm-specific Validation Error
Expand Down
31 changes: 17 additions & 14 deletions vllm/entrypoints/openai/serving_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
CompletionResponseChoice,
CompletionResponseStreamChoice,
CompletionStreamResponse,
ErrorResponse, UsageInfo,
RequestResponseMetadata)
ErrorResponse,
RequestResponseMetadata,
UsageInfo)
# yapf: enable
from vllm.entrypoints.openai.serving_engine import (BaseModelPath,
LoRAModulePath,
Expand Down Expand Up @@ -170,14 +171,15 @@ async def create_completion(

# Streaming response
if stream:
return self.completion_stream_generator(request,
result_generator,
request_id,
created_time,
model_name,
num_prompts=len(prompts),
tokenizer=tokenizer,
request_metadata=request_metadata)
return self.completion_stream_generator(
request,
result_generator,
request_id,
created_time,
model_name,
num_prompts=len(prompts),
tokenizer=tokenizer,
request_metadata=request_metadata)

# Non-streaming response
final_res_batch: List[Optional[RequestOutput]] = [None] * len(prompts)
Expand Down Expand Up @@ -354,12 +356,13 @@ async def completion_stream_generator(
exclude_unset=False, exclude_none=True))
yield f"data: {final_usage_data}\n\n"

# report to FastAPI middleware aggregate tokens (all prompts, all completions)
# report to FastAPI middleware aggregate usage across all choices
total_prompt_tokens = sum(num_prompt_tokens)
total_completion_tokens = sum(previous_num_tokens)
request_metadata.final_usage_info = UsageInfo(prompt_tokens=total_prompt_tokens,
completion_tokens=total_completion_tokens,
total_tokens=total_prompt_tokens + total_completion_tokens)
request_metadata.final_usage_info = UsageInfo(
prompt_tokens=total_prompt_tokens,
completion_tokens=total_completion_tokens,
total_tokens=total_prompt_tokens + total_completion_tokens)

except ValueError as e:
# TODO: Use a vllm-specific Validation Error
Expand Down

0 comments on commit edcc183

Please sign in to comment.