fix vision language model with prefix caching

sgl-project · Nov 28, 2024 · 6657370 · 6657370
1 parent cd51758
commit 6657370
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 4 deletions.
diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py
@@ -350,7 +350,6 @@ def forward(
                 # Fill in the placeholder for the image
                 extend_start_loc_cpu = forward_batch.extend_start_loc.cpu().numpy()
                 prefix_lens_cpu = forward_batch.extend_prefix_lens_cpu
-                pt = 0
                 for i in range(bs):
                     if not need_vision[i]:
                         continue
@@ -363,7 +362,7 @@ def forward(
                         if image_offset < prefix_len:
                             continue
 
-                        tmp_image_feature = image_features[pt][j]
+                        tmp_image_feature = image_features[i][j]
                         pad_len = tmp_image_feature.shape[0]
 
                         left_idx = start_idx + (image_offset - prefix_len)
@@ -376,7 +375,6 @@ def forward(
                             print(
                                 f"{start_idx=}, {image_offset=}, {prefix_len=}, {pad_len=}"
                             )
-                    pt += 1
 
             return self.language_model(
                 input_ids, positions, forward_batch, input_embeds=input_embeds

diff --git a/test/srt/test_session_control.py b/test/srt/test_session_control.py
@@ -186,7 +186,6 @@ def setUpClass(cls):
             cls.model,
             cls.base_url,
             timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            # other_args={"--disable-radix"},
         )
 
     @classmethod