From cf70c398a3cf0e8a96f9a38acd69b46bb873ea39 Mon Sep 17 00:00:00 2001 From: Matthias Reso <13337103+mreso@users.noreply.github.com> Date: Wed, 11 Sep 2024 22:59:00 +0000 Subject: [PATCH 1/2] Use spawn instead of fork method for vllm --- ts/torch_handler/vllm_handler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ts/torch_handler/vllm_handler.py b/ts/torch_handler/vllm_handler.py index 927efe93e2..02f7ee77b0 100644 --- a/ts/torch_handler/vllm_handler.py +++ b/ts/torch_handler/vllm_handler.py @@ -1,5 +1,6 @@ import asyncio import logging +import os import pathlib import time from unittest.mock import MagicMock @@ -40,6 +41,8 @@ def initialize(self, ctx): vllm_engine_config = self._get_vllm_engine_config( ctx.model_yaml_config.get("handler", {}) ) + + os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" self.vllm_engine = AsyncLLMEngine.from_engine_args(vllm_engine_config) From 7c61e5a362d8dfa72625dd85904f10ddb96a4ac3 Mon Sep 17 00:00:00 2001 From: Matthias Reso <13337103+mreso@users.noreply.github.com> Date: Wed, 11 Sep 2024 23:10:45 +0000 Subject: [PATCH 2/2] Fix lint --- ts/torch_handler/vllm_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ts/torch_handler/vllm_handler.py b/ts/torch_handler/vllm_handler.py index 02f7ee77b0..910a9461cc 100644 --- a/ts/torch_handler/vllm_handler.py +++ b/ts/torch_handler/vllm_handler.py @@ -41,7 +41,7 @@ def initialize(self, ctx): vllm_engine_config = self._get_vllm_engine_config( ctx.model_yaml_config.get("handler", {}) ) - + os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" self.vllm_engine = AsyncLLMEngine.from_engine_args(vllm_engine_config)