Make sglang compat with vllm 0.5.1 (#598)

sgl-project · Jul 9, 2024 · b386872 · b386872
1 parent 710f614
commit b386872
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -53,7 +53,11 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
 The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
 
 ### Common Notes
-- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html).
+- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html) by
+```
+pip uninstall -y triton triton-nightly
+pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly
+```
 - If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server.
 - If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
 

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
 
 [project.optional-dependencies]
 srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow",
-       "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.0", "outlines>=0.0.44"]
+       "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.1", "outlines>=0.0.44"]
 openai = ["openai>=1.0", "tiktoken"]
 anthropic = ["anthropic>=0.20.0"]
 litellm = ["litellm>=1.0.0"]

diff --git a/python/sglang/srt/managers/controller/model_runner.py b/python/sglang/srt/managers/controller/model_runner.py
@@ -326,7 +326,7 @@ def load_model(self):
             device_config=device_config,
             load_config=load_config,
             lora_config=None,
-            vision_language_config=None,
+            multimodal_config=None,
             parallel_config=None,
             scheduler_config=None,
             cache_config=None,

diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
@@ -476,7 +476,7 @@ def monkey_patch_vllm_dummy_weight_loader():
         ModelConfig,
         ParallelConfig,
         SchedulerConfig,
-        VisionLanguageConfig,
+        MultiModalConfig,
         _initialize_model,
         initialize_dummy_weights,
         nn,
@@ -489,7 +489,7 @@ def load_model(
         model_config: ModelConfig,
         device_config: DeviceConfig,
         lora_config: Optional[LoRAConfig],
-        vision_language_config: Optional[VisionLanguageConfig],
+        multimodal_config: Optional[MultiModalConfig],
         parallel_config: ParallelConfig,
         scheduler_config: SchedulerConfig,
         cache_config: CacheConfig,
@@ -500,7 +500,7 @@ def load_model(
                     model_config,
                     self.load_config,
                     lora_config,
-                    vision_language_config,
+                    multimodal_config,
                     cache_config,
                 )