Skip to content

Commit

Permalink
[Frontend] Add template related params to request (vllm-project#5709)
Browse files Browse the repository at this point in the history
  • Loading branch information
danieljannai21 authored Jul 2, 2024
1 parent 3476ed0 commit 2c37540
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
2 changes: 1 addition & 1 deletion requirements-common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ numpy < 2.0.0
requests
tqdm
py-cpuinfo
transformers >= 4.42.0 # Required for Gemma 2.
transformers >= 4.42.0 # Required for Gemma 2 and for additional chat template parameters.
tokenizers >= 0.19.1 # Required for Llama 3.
fastapi
aiohttp
Expand Down
21 changes: 21 additions & 0 deletions vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,27 @@ class ChatCompletionRequest(OpenAIBaseModel):
"special tokens so this should be set to False (as is the "
"default)."),
)
documents: Optional[List[Dict[str, str]]] = Field(
default=None,
description=
("A list of dicts representing documents that will be accessible to "
"the model if it is performing RAG (retrieval-augmented generation)."
" If the template does not support RAG, this argument will have no "
"effect. We recommend that each document should be a dict containing "
"\"title\" and \"text\" keys."),
)
chat_template: Optional[str] = Field(
default=None,
description=(
"A Jinja template to use for this conversion. "
"If this is not passed, the model's default chat template will be "
"used instead."),
)
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the template renderer. "
"Will be accessible by the chat template."),
)
include_stop_str_in_output: Optional[bool] = Field(
default=False,
description=(
Expand Down
8 changes: 8 additions & 0 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,18 @@ async def create_chat_completion(
conversation.extend(chat_parsed_result.messages)
image_futures.extend(chat_parsed_result.image_futures)

tool_dicts = None if request.tools is None else [
tool.model_dump() for tool in request.tools
]

prompt = self.tokenizer.apply_chat_template(
conversation=conversation,
tokenize=False,
add_generation_prompt=request.add_generation_prompt,
tools=tool_dicts,
documents=request.documents,
chat_template=request.chat_template,
**(request.chat_template_kwargs or {}),
)
except Exception as e:
logger.error("Error in applying chat template from request: %s", e)
Expand Down

0 comments on commit 2c37540

Please sign in to comment.