From 7306d551606109dfaa0fbca3184f7547de07719a Mon Sep 17 00:00:00 2001 From: HaiShaw Date: Mon, 4 Nov 2024 16:38:08 -0800 Subject: [PATCH 1/3] [Release, ROCm] release ROCm docker build for AMD MI GPUs --- .github/workflows/release-docker.yml | 28 +++++++++++++++++----------- docker/Dockerfile.rocm | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 619b07fe34..e69fc3a6e4 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -14,7 +14,7 @@ jobs: environment: 'prod' strategy: matrix: - cuda_version: ['11.8.0', '12.1.1', '12.4.1'] + vendor_version: ['11.8.0', '12.1.1', '12.4.1', '6.2.0'] build_type: ['all', 'srt'] steps: - name: Delete huge unnecessary tools folder @@ -33,18 +33,20 @@ jobs: run: | version=$(cat python/sglang/version.py | cut -d'"' -f2) - if [ "${{ matrix.cuda_version }}" = "11.8.0" ]; then - cuda_tag="cu118" - elif [ "${{ matrix.cuda_version }}" = "12.1.1" ]; then - cuda_tag="cu121" - elif [ "${{ matrix.cuda_version }}" = "12.4.1" ]; then - cuda_tag="cu124" + if [ "${{ matrix.vendor_version }}" = "11.8.0" ]; then + ven_tag="cu118" + elif [ "${{ matrix.vendor_version }}" = "12.1.1" ]; then + ven_tag="cu121" + elif [ "${{ matrix.vendor_version }}" = "12.4.1" ]; then + ven_tag="cu124" + elif [ "${{ matrix.vendor_version }}" = "6.2.0" ]; then + ven_tag="rocm620" else - echo "Unsupported CUDA version" + echo "Unsupported CUDA or ROCm version" exit 1 fi - tag=v${version}-${cuda_tag} + tag=v${version}-${ven_tag} if [ "${{ matrix.build_type }}" = "all" ]; then tag_suffix="" @@ -55,10 +57,14 @@ jobs: exit 1 fi - docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache + if [ "${{ matrix.vendor_version }}" = "6.2.0" ]; then + docker build . -f docker/Dockerfile.rocm --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache + else + docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.vendor_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache + fi docker push lmsysorg/sglang:${tag}${tag_suffix} - if [ "${{ matrix.cuda_version }}" = "12.1.1" ]; then + if [ "${{ matrix.vendor_version }}" = "12.1.1" ]; then docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix} docker push lmsysorg/sglang:latest${tag_suffix} fi diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 42b3135955..fcc295c7e7 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -8,7 +8,7 @@ FROM $BASE_IMAGE AS base USER root WORKDIR /sgl-workspace - +ARG BUILD_TYPE=all ARG SGL_REPO="https://github.com/sgl-project/sglang" ENV SGL_DEFAULT="main" ARG SGL_BRANCH=${SGL_DEFAULT} From 26d8dcdc5e60ad9c2225996fd27e893be338118d Mon Sep 17 00:00:00 2001 From: HaiShaw Date: Wed, 6 Nov 2024 15:57:47 -0800 Subject: [PATCH 2/3] Tuning extend kernel args to _fwd_kernel --- .../srt/layers/attention/triton_ops/extend_attention.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/sglang/srt/layers/attention/triton_ops/extend_attention.py b/python/sglang/srt/layers/attention/triton_ops/extend_attention.py index 52a72d7fea..8c588bd9ce 100644 --- a/python/sglang/srt/layers/attention/triton_ops/extend_attention.py +++ b/python/sglang/srt/layers/attention/triton_ops/extend_attention.py @@ -25,6 +25,7 @@ from sglang.srt.layers.attention.triton_ops.prefill_attention import ( context_attention_fwd, ) +from sglang.srt.utils import is_hip is_cuda_available = torch.cuda.is_available() if is_cuda_available: @@ -311,6 +312,10 @@ def extend_attention_fwd( num_warps = 4 if Lk <= 64 else 8 num_stages = 1 + extra_kargs = {} + if is_hip(): + extra_kargs = {"waves_per_eu": 4, "matrix_instr_nonkdim": 16, "kpack": 2} + _fwd_kernel[grid]( q_extend, k_extend, @@ -348,6 +353,7 @@ def extend_attention_fwd( Lv=Lv, num_warps=num_warps, num_stages=num_stages, + **extra_kargs, ) From fad57d392d5dca2d4369d31cb3c8577c81b82364 Mon Sep 17 00:00:00 2001 From: HaiShaw Date: Wed, 6 Nov 2024 16:09:59 -0800 Subject: [PATCH 3/3] remove pending changes --- .github/workflows/release-docker.yml | 28 +++++++++++----------------- docker/Dockerfile.rocm | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index e69fc3a6e4..619b07fe34 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -14,7 +14,7 @@ jobs: environment: 'prod' strategy: matrix: - vendor_version: ['11.8.0', '12.1.1', '12.4.1', '6.2.0'] + cuda_version: ['11.8.0', '12.1.1', '12.4.1'] build_type: ['all', 'srt'] steps: - name: Delete huge unnecessary tools folder @@ -33,20 +33,18 @@ jobs: run: | version=$(cat python/sglang/version.py | cut -d'"' -f2) - if [ "${{ matrix.vendor_version }}" = "11.8.0" ]; then - ven_tag="cu118" - elif [ "${{ matrix.vendor_version }}" = "12.1.1" ]; then - ven_tag="cu121" - elif [ "${{ matrix.vendor_version }}" = "12.4.1" ]; then - ven_tag="cu124" - elif [ "${{ matrix.vendor_version }}" = "6.2.0" ]; then - ven_tag="rocm620" + if [ "${{ matrix.cuda_version }}" = "11.8.0" ]; then + cuda_tag="cu118" + elif [ "${{ matrix.cuda_version }}" = "12.1.1" ]; then + cuda_tag="cu121" + elif [ "${{ matrix.cuda_version }}" = "12.4.1" ]; then + cuda_tag="cu124" else - echo "Unsupported CUDA or ROCm version" + echo "Unsupported CUDA version" exit 1 fi - tag=v${version}-${ven_tag} + tag=v${version}-${cuda_tag} if [ "${{ matrix.build_type }}" = "all" ]; then tag_suffix="" @@ -57,14 +55,10 @@ jobs: exit 1 fi - if [ "${{ matrix.vendor_version }}" = "6.2.0" ]; then - docker build . -f docker/Dockerfile.rocm --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache - else - docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.vendor_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache - fi + docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache docker push lmsysorg/sglang:${tag}${tag_suffix} - if [ "${{ matrix.vendor_version }}" = "12.1.1" ]; then + if [ "${{ matrix.cuda_version }}" = "12.1.1" ]; then docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix} docker push lmsysorg/sglang:latest${tag_suffix} fi diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index fcc295c7e7..42b3135955 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -8,7 +8,7 @@ FROM $BASE_IMAGE AS base USER root WORKDIR /sgl-workspace -ARG BUILD_TYPE=all + ARG SGL_REPO="https://github.com/sgl-project/sglang" ENV SGL_DEFAULT="main" ARG SGL_BRANCH=${SGL_DEFAULT}