diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 3a2c2761b0..2b9296d8f2 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -1,5 +1,5 @@ # Usage (to build SGLang ROCm docker image): -# docker build --build-arg SGL_BRANCH=v0.3.6.post2 -t testImage -f Dockerfile.rocm . +# docker build --build-arg SGL_BRANCH=v0.3.6.post2 -t v0.3.6.post2-rocm620 -f Dockerfile.rocm . # default base image ARG BASE_IMAGE="rocm/vllm-dev:20241022" diff --git a/docs/start/install.md b/docs/start/install.md index 220fc3c5b5..8debab0eb1 100644 --- a/docs/start/install.md +++ b/docs/start/install.md @@ -28,6 +28,17 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ Note: Please check the [FlashInfer installation doc](https://docs.flashinfer.ai/installation.html) to install the proper version according to your PyTorch and CUDA versions. +Note: To AMD ROCm system with Instinct/MI GPUs, do following instead: + +``` +# Use the last release branch +git clone -b v0.3.6.post2 https://github.com/sgl-project/sglang.git +cd sglang + +pip install --upgrade pip +pip install -e "python[all_hip]" +``` + ## Method 3: Using docker The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags), built from [Dockerfile](https://github.com/sgl-project/sglang/tree/main/docker). Replace `` below with your huggingface hub [token](https://huggingface.co/docs/hub/en/security-tokens). diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 7c3a7d68de..a327f37a2f 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -171,6 +171,10 @@ def __init__( self.enable_overlap = False logger.info("Overlap scheduler is disabled for embedding models.") + if self.model_config.is_multimodal: + self.enable_overlap = False + logger.info("Overlap scheduler is disabled for multimodal models.") + if self.enable_overlap: self.disable_jump_forward = True