From f790ad3c50f050778af1fd31170746b7c68ca2fc Mon Sep 17 00:00:00 2001 From: Avinash Raj Date: Sun, 2 Jun 2024 13:36:13 +0530 Subject: [PATCH] [Frontend][OpenAI] Support for returning max_model_len on /v1/models response (#4643) --- vllm/entrypoints/openai/protocol.py | 1 + vllm/entrypoints/openai/serving_engine.py | 1 + 2 files changed, 2 insertions(+) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index e380212a4d76b..bbd61a2c5dd59 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -82,6 +82,7 @@ class ModelCard(OpenAIBaseModel): owned_by: str = "vllm" root: Optional[str] = None parent: Optional[str] = None + max_model_len: Optional[int] = None permission: List[ModelPermission] = Field(default_factory=list) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 066acdf1c019a..ae659d19c878b 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -62,6 +62,7 @@ async def show_available_models(self) -> ModelList: """Show available models. Right now we only have one model.""" model_cards = [ ModelCard(id=served_model_name, + max_model_len=self.max_model_len, root=self.served_model_names[0], permission=[ModelPermission()]) for served_model_name in self.served_model_names