Merge pull request #149 from mobiusml/hr/add-idefics-2-deployment

Hr/add idefics 2 deployment
mobiusml · Aug 7, 2024 · 309b9b0 · 309b9b0
2 parents 27b22dd + 1b87aee
commit 309b9b0
Show file tree

Hide file tree

Showing 25 changed files with 2,274 additions and 1,676 deletions.
diff --git a/aana/api/request_handler.py b/aana/api/request_handler.py
@@ -15,7 +15,7 @@
 from aana.api.event_handlers.event_manager import EventManager
 from aana.api.responses import AanaJSONResponse
 from aana.configs.settings import settings as aana_settings
-from aana.core.models.chat import ChatCompletetion, ChatCompletionRequest, ChatDialog
+from aana.core.models.chat import ChatCompletion, ChatCompletionRequest, ChatDialog
 from aana.core.models.sampling import SamplingParams
 from aana.core.models.task import TaskId, TaskInfo
 from aana.deployments.aana_deployment_handle import AanaDeploymentHandle
@@ -158,7 +158,7 @@ async def delete_task_endpoint(
         task = task_repo.delete(task_id)
         return TaskId(task_id=str(task.id))
 
-    @app.post("/chat/completions", response_model=ChatCompletetion)
+    @app.post("/chat/completions", response_model=ChatCompletion)
     async def chat_completions(self, request: ChatCompletionRequest):
         """Handle chat completions requests for OpenAI compatible API."""
 

diff --git a/aana/configs/deployments.py b/aana/configs/deployments.py
@@ -11,6 +11,7 @@
     HfTextGenerationConfig,
     HfTextGenerationDeployment,
 )
+from aana.deployments.idefics_2_deployment import Idefics2Config, Idefics2Deployment
 from aana.deployments.stablediffusion2_deployment import (
     StableDiffusion2Config,
     StableDiffusion2Deployment,
@@ -190,6 +191,20 @@
     "hf_phi3_mini_4k_instruct_text_gen_deployment"
 ] = hf_phi3_mini_4k_instruct_text_gen_deployment
 
+
+idefics_2_deployment = Idefics2Deployment.options(
+    num_replicas=1,
+    ray_actor_options={"num_gpus": 0.85},
+    user_config=Idefics2Config(
+        model="HuggingFaceM4/idefics2-8b",
+        dtype=Dtype.FLOAT16,
+    ).model_dump(mode="json"),
+)
+
+available_deployments[
+    "idefics_2_deployment"
+] = idefics_2_deployment
+
 __all__ = [
     "vllm_llama2_7b_chat_deployment",
     "meta_llama3_8b_instruct_deployment",
@@ -201,4 +216,5 @@
     "vad_deployment",
     "hf_blip2_opt_2_7b_pipeline_deployment",
     "hf_phi3_mini_4k_instruct_text_gen_deployment",
+    "idefics_2_deployment",
 ]
diff --git a/aana/core/models/chat.py b/aana/core/models/chat.py
@@ -9,8 +9,8 @@
     "ChatMessage",
     "ChatDialog",
     "ChatCompletionRequest",
-    "ChatCompletetionChoice",
-    "ChatCompletetion",
+    "ChatCompletionChoice",
+    "ChatCompletion",
 ]
 
 Role = Literal["system", "user", "assistant"]
@@ -170,7 +170,7 @@ class ChatCompletionRequest(BaseModel):
     )
 
 
-class ChatCompletetionChoice(BaseModel):
+class ChatCompletionChoice(BaseModel):
     """A chat completion choice for OpenAI compatible API.
 
     Attributes:
@@ -186,14 +186,14 @@ class ChatCompletetionChoice(BaseModel):
     )
 
 
-class ChatCompletetion(BaseModel):
+class ChatCompletion(BaseModel):
     """A chat completion for OpenAI compatible API.
 
     Attributes:
         id (str): a unique identifier for the chat completion
         model (str): the model used for the chat completion
         created (int): the Unix timestamp (in seconds) of when the chat completion was created
-        choices (list[ChatCompletetionChoice]): a list of chat completion choices
+        choices (list[ChatCompletionChoice]): a list of chat completion choices
         object (Literal["chat.completion"]): the object type, which is always `chat.completion`
     """
 
@@ -203,7 +203,7 @@ class ChatCompletetion(BaseModel):
         ...,
         description="The Unix timestamp (in seconds) of when the chat completion was created.",
     )
-    choices: list[ChatCompletetionChoice] = Field(
+    choices: list[ChatCompletionChoice] = Field(
         ...,
         description="A list of chat completion choices.",
     )

diff --git a/aana/core/models/image_chat.py b/aana/core/models/image_chat.py
@@ -0,0 +1,139 @@
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from aana.core.models.image import Image
+
+Role = Literal["system", "user", "assistant"]
+
+
+class TextContent(BaseModel):
+    """Text content for a chat message.
+
+    Attributes:
+        type (Literal["text"]): the type of the content, always "text"
+        text (str): the text of the message
+    """
+
+    type: Literal["text"] = "text"
+    text: str
+
+
+class ImageContent(BaseModel):
+    """Image content for a chat message.
+
+    Attributes:
+        type (Literal["image"]): the type of the content, always "image"
+        image (Image): the image
+    """
+
+    type: Literal["image"] = "image"
+    image: Image
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+
+Content = Annotated[
+    TextContent | ImageContent,
+    Field(description="The content of the message, either text or image."),
+]
+
+
+class ImageChatMessage(BaseModel):
+    """A chat message with image support.
+
+    Attributes:
+        content (list[Content]): the content of the message
+        role (Role): the role of the message
+    """
+
+    content: list[Content]
+    role: Role
+    model_config = ConfigDict(
+        json_schema_extra={"description": "A chat message with image support."}
+    )
+
+
+class ImageChatDialog(BaseModel):
+    """A chat dialog with image support.
+
+    Attributes:
+        messages (list[ImageChatMessage]): the list of messages
+    """
+
+    messages: list[ImageChatMessage]
+    model_config = ConfigDict(
+        json_schema_extra={"description": "A chat dialog with image support."}
+    )
+
+    @classmethod
+    def from_list(cls, messages: list[dict[str, Any]]) -> "ImageChatDialog":
+        """Create an ImageChatDialog from a list of messages.
+
+        Args:
+            messages (list[dict[str, str]]): the list of messages
+
+        Returns:
+            ImageChatDialog: the chat dialog
+        
+        Example:
+        ```
+        messages = [
+            {
+                "content": [
+                    { "type": "image", "image": Image(...) },
+                    { "type": "text", "text": "..." }
+                ],
+                "role": "system"
+            },
+            {
+                "content": [
+                    { "type": "image", "image": Image(...) },
+                    { "type": "text", "text": "..." }
+                ],
+                "role": "user"
+            }
+        ]
+        dialog = ImageChatDialog.from_list(messages)
+        ```
+        """
+        return ImageChatDialog(
+            messages=[ImageChatMessage(**message) for message in messages]
+        )
+
+    @classmethod
+    def from_prompt(cls, prompt: str, images: list[Image]) -> "ImageChatDialog":
+        """Create an ImageChatDialog from a prompt and a list of images.
+
+        Args:
+            prompt (str): the prompt
+            images (list[Image]): the list of images
+
+        Returns:
+            ImageChatDialog: the chat dialog
+        """
+        content: list[Content] = [ImageContent(image=image) for image in images]
+        content.append(TextContent(text=prompt))
+
+        return ImageChatDialog(
+            messages=[ImageChatMessage(content=content, role="user")]
+        )
+
+    def to_objects(self) -> tuple[list[dict], list[Image]]:
+        """Convert ImageChatDialog to messages and images.
+
+        Returns:
+            tuple[list[dict], list[Image]]: the messages and the images
+        """
+        dialog_dict = self.model_dump(
+            exclude={"messages": {"__all__": {"content": {"__all__": {"image"}}}}}
+        )
+        messages = dialog_dict["messages"]
+        # images = []
+        # for message in self.messages:
+        #     for content in message.content:
+        #         if content.type == "image":
+        #             images.append(content.image)
+        images = [content.image for message in self.messages for content in message.content if content.type == "image"]
+
+        return messages, images
diff --git a/aana/core/models/video.py b/aana/core/models/video.py
@@ -24,7 +24,7 @@
 from aana.core.models.base import BaseListModel
 from aana.core.models.media import MediaId
 
-__all__ = ["VideoMetadata", "VideoStatus", "VideoParams"]
+__all__ = ["VideoMetadata", "VideoParams"]
 
 
 @dataclass

diff --git a/aana/deployments/__init__.py b/aana/deployments/__init__.py
@@ -25,6 +25,7 @@
     HfTextGenerationConfig,
     HfTextGenerationDeployment,
 )
+from aana.deployments.idefics_2_deployment import Idefics2Config, Idefics2Deployment
 from aana.deployments.vad_deployment import (
     VadConfig,
     VadDeployment,
@@ -46,6 +47,8 @@
     "BaseTextGenerationDeployment",
     "HfTextGenerationConfig",
     "HfTextGenerationDeployment",
+    "Idefics2Config",
+    "Idefics2Deployment",
     "VLLMConfig",
     "VLLMDeployment",
     "WhisperConfig",