diff --git a/README.md b/README.md
index c22c257b59..01d2dd9bc1 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,11 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
 The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
 
 ### Common Notes
-- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html).
+- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html) by
+```
+pip uninstall -y triton triton-nightly
+pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly
+```
 - If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server.
 - If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 22b7e69dff..e8de29e1df 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
 
 [project.optional-dependencies]
 srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow",
-       "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.0", "outlines>=0.0.44"]
+       "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.1", "outlines>=0.0.44"]
 openai = ["openai>=1.0", "tiktoken"]
 anthropic = ["anthropic>=0.20.0"]
 litellm = ["litellm>=1.0.0"]
diff --git a/python/sglang/srt/managers/controller/model_runner.py b/python/sglang/srt/managers/controller/model_runner.py
index 879f44151c..2877ada9b8 100644
--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -323,7 +323,7 @@ def load_model(self):
             device_config=device_config,
             load_config=load_config,
             lora_config=None,
-            vision_language_config=None,
+            multimodal_config=None,
             parallel_config=None,
             scheduler_config=None,
             cache_config=None,
diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index a9ea62e4b8..78af85095e 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -480,7 +480,7 @@ def monkey_patch_vllm_dummy_weight_loader():
         ModelConfig,
         ParallelConfig,
         SchedulerConfig,
-        VisionLanguageConfig,
+        MultiModalConfig,
         _initialize_model,
         initialize_dummy_weights,
         nn,
@@ -493,7 +493,7 @@ def load_model(
         model_config: ModelConfig,
         device_config: DeviceConfig,
         lora_config: Optional[LoRAConfig],
-        vision_language_config: Optional[VisionLanguageConfig],
+        multimodal_config: Optional[MultiModalConfig],
         parallel_config: ParallelConfig,
         scheduler_config: SchedulerConfig,
         cache_config: CacheConfig,
@@ -504,7 +504,7 @@ def load_model(
                     model_config,
                     self.load_config,
                     lora_config,
-                    vision_language_config,
+                    multimodal_config,
                     cache_config,
                 )