From 6657370622c8c3ab47728ac0ecb51930217f72f9 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Wed, 27 Nov 2024 23:51:20 -0800 Subject: [PATCH] fix vision language model with prefix caching --- python/sglang/srt/models/llava.py | 4 +--- test/srt/test_session_control.py | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index b07474ad98..2eaaae1a6b 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -350,7 +350,6 @@ def forward( # Fill in the placeholder for the image extend_start_loc_cpu = forward_batch.extend_start_loc.cpu().numpy() prefix_lens_cpu = forward_batch.extend_prefix_lens_cpu - pt = 0 for i in range(bs): if not need_vision[i]: continue @@ -363,7 +362,7 @@ def forward( if image_offset < prefix_len: continue - tmp_image_feature = image_features[pt][j] + tmp_image_feature = image_features[i][j] pad_len = tmp_image_feature.shape[0] left_idx = start_idx + (image_offset - prefix_len) @@ -376,7 +375,6 @@ def forward( print( f"{start_idx=}, {image_offset=}, {prefix_len=}, {pad_len=}" ) - pt += 1 return self.language_model( input_ids, positions, forward_batch, input_embeds=input_embeds diff --git a/test/srt/test_session_control.py b/test/srt/test_session_control.py index 7396779f64..131e3a39d6 100644 --- a/test/srt/test_session_control.py +++ b/test/srt/test_session_control.py @@ -186,7 +186,6 @@ def setUpClass(cls): cls.model, cls.base_url, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - # other_args={"--disable-radix"}, ) @classmethod