diff --git a/README.md b/README.md index c22c257b59..01d2dd9bc1 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,11 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags). ### Common Notes -- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html). +- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html) by +``` +pip uninstall -y triton triton-nightly +pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly +``` - If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server. - If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`. diff --git a/python/pyproject.toml b/python/pyproject.toml index 22b7e69dff..e8de29e1df 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ [project.optional-dependencies] srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow", - "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.0", "outlines>=0.0.44"] + "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.1", "outlines>=0.0.44"] openai = ["openai>=1.0", "tiktoken"] anthropic = ["anthropic>=0.20.0"] litellm = ["litellm>=1.0.0"] diff --git a/python/sglang/srt/managers/controller/model_runner.py b/python/sglang/srt/managers/controller/model_runner.py index 879f44151c..2877ada9b8 100644 --- a/python/sglang/srt/managers/controller/model_runner.py +++ b/python/sglang/srt/managers/controller/model_runner.py @@ -323,7 +323,7 @@ def load_model(self): device_config=device_config, load_config=load_config, lora_config=None, - vision_language_config=None, + multimodal_config=None, parallel_config=None, scheduler_config=None, cache_config=None, diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index a9ea62e4b8..78af85095e 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -480,7 +480,7 @@ def monkey_patch_vllm_dummy_weight_loader(): ModelConfig, ParallelConfig, SchedulerConfig, - VisionLanguageConfig, + MultiModalConfig, _initialize_model, initialize_dummy_weights, nn, @@ -493,7 +493,7 @@ def load_model( model_config: ModelConfig, device_config: DeviceConfig, lora_config: Optional[LoRAConfig], - vision_language_config: Optional[VisionLanguageConfig], + multimodal_config: Optional[MultiModalConfig], parallel_config: ParallelConfig, scheduler_config: SchedulerConfig, cache_config: CacheConfig, @@ -504,7 +504,7 @@ def load_model( model_config, self.load_config, lora_config, - vision_language_config, + multimodal_config, cache_config, )