Skip to content

Commit

Permalink
Update vllm to 0.6.2
Browse files Browse the repository at this point in the history
  • Loading branch information
mreso committed Oct 7, 2024
1 parent e8879c1 commit 32235f2
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
2 changes: 1 addition & 1 deletion examples/large_models/vllm/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
vllm==0.6.1.post2
vllm==0.6.2
11 changes: 8 additions & 3 deletions ts/torch_handler/vllm_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.serving_engine import LoRAModulePath
from vllm.entrypoints.openai.serving_engine import BaseModelPath, LoRAModulePath

from ts.handler_utils.utils import send_intermediate_predict_response
from ts.service import PredictionException
Expand Down Expand Up @@ -54,6 +54,11 @@ def initialize(self, ctx):
else:
served_model_names = [vllm_engine_config.model]

base_model_paths = [
BaseModelPath(name=name, model_path=vllm_engine_config.model)
for name in served_model_names
]

chat_template = ctx.model_yaml_config.get("handler", {}).get(
"chat_template", None
)
Expand All @@ -64,7 +69,7 @@ def initialize(self, ctx):
self.completion_service = OpenAIServingCompletion(
self.vllm_engine,
model_config,
served_model_names,
base_model_paths,
lora_modules=lora_modules,
prompt_adapters=None,
request_logger=None,
Expand All @@ -73,7 +78,7 @@ def initialize(self, ctx):
self.chat_completion_service = OpenAIServingChat(
self.vllm_engine,
model_config,
served_model_names,
base_model_paths,
"assistant",
lora_modules=lora_modules,
prompt_adapters=None,
Expand Down

0 comments on commit 32235f2

Please sign in to comment.