Merge pull request vllm-project#1 from Bellk17/main

Triton compilation fix
Bellk17 · Apr 12, 2024 · b36d574 · b36d574
2 parents c2b4a1b + fae4f82
commit b36d574
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py
@@ -415,7 +415,11 @@ def attn_fwd(
             return
 
     is_mqa = hq != hk
-    off_h_k = off_h_q % hk if is_mqa else off_h_q
+    if is_mqa:  # noqa: SIM108
+        off_h_k = off_h_q % hk
+    else:
+        off_h_k = off_h_q
+
     n_extra_tokens = 0
     if seqlen_k < BLOCK_N:
         n_extra_tokens = BLOCK_N - seqlen_k