Skip to content

Commit

Permalink
fix vision language model with prefix caching
Browse files Browse the repository at this point in the history
  • Loading branch information
Ying1123 committed Nov 28, 2024
1 parent cd51758 commit 6657370
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 4 deletions.
4 changes: 1 addition & 3 deletions python/sglang/srt/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,6 @@ def forward(
# Fill in the placeholder for the image
extend_start_loc_cpu = forward_batch.extend_start_loc.cpu().numpy()
prefix_lens_cpu = forward_batch.extend_prefix_lens_cpu
pt = 0
for i in range(bs):
if not need_vision[i]:
continue
Expand All @@ -363,7 +362,7 @@ def forward(
if image_offset < prefix_len:
continue

tmp_image_feature = image_features[pt][j]
tmp_image_feature = image_features[i][j]
pad_len = tmp_image_feature.shape[0]

left_idx = start_idx + (image_offset - prefix_len)
Expand All @@ -376,7 +375,6 @@ def forward(
print(
f"{start_idx=}, {image_offset=}, {prefix_len=}, {pad_len=}"
)
pt += 1

return self.language_model(
input_ids, positions, forward_batch, input_embeds=input_embeds
Expand Down
1 change: 0 additions & 1 deletion test/srt/test_session_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ def setUpClass(cls):
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args={"--disable-radix"},
)

@classmethod
Expand Down

0 comments on commit 6657370

Please sign in to comment.