Update vllm/multimodal/image.py

Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
vllm-project · Aug 21, 2024 · 723d727 · 723d727
1 parent b4a0bcc
commit 723d727
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 13 deletions.
diff --git a/tests/models/test_llava_next.py b/tests/models/test_llava_next.py
@@ -14,16 +14,11 @@
 
 _LIMIT_IMAGE_PER_PROMPT = 4
 
-_PREFACE = (
-    "A chat between a curious human and an artificial intelligence assistant. "
-    "The assistant gives helpful, detailed, and polite answers to the human's "
-    "questions.")
-
 HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
     "stop_sign":
-    f"{_PREFACE} USER: <image>\nWhat's the content of the image? ASSISTANT:",
+    "[INST] <image>\nWhat's the content of the image? [/INST]",
     "cherry_blossom":
-    f"{_PREFACE} USER: <image>\nWhat is the season? ASSISTANT:",
+    "[INST] <image>\nWhat is the season? [/INST]",
 })
 
 models = ["llava-hf/llava-v1.6-mistral-7b-hf"]
@@ -256,10 +251,10 @@ def test_models_multiple_image_inputs(hf_runner, vllm_runner, image_assets,
 
     inputs = [(
         [
-            f"{_PREFACE} USER: <image><image>\nDescribe the 2 images. ASSISTANT:",  # noqa: E501
-            f"{_PREFACE} USER: <image><image>\nDescribe the 2 images. ASSISTANT:",  # noqa: E501
-            f"{_PREFACE} USER: <image><image><image><image>\nDescribe the 4 images. ASSISTANT:",  # noqa: E501
-            f"{_PREFACE} USER: <image>\nWhat is the season? ASSISTANT:"
+            "[INST] <image><image>\nDescribe 2 images. [/INST]",
+            "[INST] <image><image>\nDescribe 2 images. [/INST]",
+            "[INST] <image><image><image><image>\nDescribe 4 images. [/INST]",
+            "[INST] <image>\nWhat is the season? [/INST]"
         ],
         [
             [stop_sign, cherry_blossom],

diff --git a/vllm/multimodal/image.py b/vllm/multimodal/image.py
@@ -77,11 +77,11 @@ def repeat_and_pad_image_tokens(
         prompt_parts = prompt.split(image_token_str,
                                     maxsplit=len(repeat_count))
         new_prompt = ""
-        for i in range(len(repeat_count)):
+        for i, repeat_count_item in enumerate(repeat_count):
             replacement_str = "".join(
                 repeat_and_pad_token(
                     image_token_str,
-                    repeat_count=repeat_count[i],
+                    repeat_count=repeat_count_item,
                     pad_token_left=pad_token_str_left,
                     pad_token_right=pad_token_str_right,
                 ))