[unittest] add assertions to unit test cases

deepjavalibrary · Jul 8, 2024 · b3708d7 · b3708d7
1 parent dc186b7
commit b3708d7
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 24 deletions.
diff --git a/engines/python/setup/djl_python/multimodal/utils.py b/engines/python/setup/djl_python/multimodal/utils.py
@@ -25,11 +25,8 @@ def get_image_text_prompt(prompt_text: str) -> str:
     # TODO: image token str must be decoded from image_token_id in serving.properties. Change it after refactor PR.
     image_token_str = '<image>'
 
-    # TODO: image_feature_size should be referred from serving.properties. Change it after refactor PR.
-    image_feature_size = 1176
-
     # TODO: Remove image_token_str*1176 after vllm next release, as the image placeholder is not needed.
-    return f"{image_token_str*image_feature_size}\n{prompt_text}"
+    return f"{image_token_str}\n{prompt_text}"
 
 
 def load_image_from_base64(image: Union[bytes, str]) -> Image.Image:

diff --git a/engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py b/engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py
@@ -48,15 +48,6 @@ class VllmRbProperties(Properties):
     device: Optional[str] = None
     preloaded_model: Optional[Any] = None
 
-    # Vision language configurations
-    # TODO: remove this after vLLM next release
-    image_token_id: Optional[int] = None
-    image_input_type: Optional[str] = None
-    image_input_shape: Optional[str] = None
-    image_feature_size: Optional[int] = None
-    image_processor: Optional[str] = None
-    image_processor_revision: Optional[str] = None
-
     @field_validator('engine')
     def validate_engine(cls, engine):
         if engine != "Python":

diff --git a/engines/python/setup/djl_python/rolling_batch/rolling_batch_vllm_utils.py b/engines/python/setup/djl_python/rolling_batch/rolling_batch_vllm_utils.py
@@ -16,7 +16,6 @@
 from vllm import EngineArgs
 from vllm.outputs import CompletionOutput, RequestOutput as vLLMRequestOutput
 from vllm.lora.request import LoRARequest
-from vllm.multimodal.image import ImagePixelData
 from vllm.inputs import PromptInputs
 
 from djl_python.request_io import Token, Sequence
@@ -227,21 +226,16 @@ def get_engine_args_from_config(config: VllmRbProperties) -> EngineArgs:
             max_lora_rank=config.max_lora_rank,
             lora_extra_vocab_size=config.lora_extra_vocab_size,
             max_cpu_loras=config.max_cpu_loras,
-            revision=config.revision,
-            image_input_type=config.image_input_type,
-            image_token_id=config.image_token_id,
-            image_input_shape=config.image_input_shape,
-            image_feature_size=config.image_feature_size,
-            image_processor=config.image_processor,
-            image_processor_revision=config.image_processor_revision)
+            revision=config.revision)
 
 
 def get_multi_modal_data(request: Request) -> dict:
     parameters = request.request_input.parameters
     images = parameters.pop("images", None)
     multi_modal_data = None
     if images:
-        multi_modal_data = ImagePixelData(images[0])
+        # vLLM only supports one image per request.
+        multi_modal_data = {"image": images[0]}
     return multi_modal_data
 
 

diff --git a/...tests/multimodal/test_parse_multimodal.py → ...tests/multimodal/test_multimodal_utils.py b/...tests/multimodal/test_parse_multimodal.py → ...tests/multimodal/test_multimodal_utils.py
@@ -17,7 +17,7 @@
 )
 
 
-class TestLmiDist(unittest.TestCase):
+class TestMultiModalUtils(unittest.TestCase):
 
     def test_open_ai_format_parse(self):
         image_url = "https://resources.djl.ai/images/dog_bike_car.jpg"
@@ -45,7 +45,21 @@ def test_open_ai_format_parse(self):
                                                         is_rolling_batch=True,
                                                         tokenizer=tokenizer)
         print(inputs)
+        image_token = "<image>"
+        self.assertEqual(
+            f"<|im_start|>user\n{image_token*1176}\nWhat’s in this image?<|im_end|>\n",
+            inputs)
         images = params.pop("images", None)
         for image in images:
             print(image)
-        print(params)
+        self.assertEqual(
+            {
+                'frequency_penalty': 0.0,
+                'presence_penalty': 0.0,
+                'stream': False,
+                'temperature': 1.0,
+                'top_p': 1.0,
+                'do_sample': True,
+                'details': True,
+                'output_formatter': 'json_chat'
+            }, params)