diff --git a/src/helm/clients/anthropic_client.py b/src/helm/clients/anthropic_client.py index 07f38f4176..343365ab5f 100644 --- a/src/helm/clients/anthropic_client.py +++ b/src/helm/clients/anthropic_client.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, TypedDict, cast +from typing import Any, Dict, List, Optional, TypedDict, Union, cast import json import requests import time @@ -6,6 +6,7 @@ from helm.common.cache import CacheConfig from helm.common.hierarchical_logger import htrack_block, hlog +from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE from helm.common.optional_dependencies import handle_module_not_found_error from helm.common.request import ( wrap_request_time, @@ -27,6 +28,8 @@ try: from anthropic import Anthropic from anthropic.types import MessageParam + from anthropic.types.image_block_param import ImageBlockParam + from anthropic.types.text_block_param import TextBlockParam import websocket except ModuleNotFoundError as e: handle_module_not_found_error(e, ["anthropic"]) @@ -242,13 +245,54 @@ def make_request(self, request: Request) -> RequestResult: messages: List[MessageParam] = [] system_message: Optional[MessageParam] = None - if request.messages and request.prompt: - raise AnthropicMessagesRequestError("Exactly one of Request.messages and Request.prompt should be set") - if request.messages: + + if request.messages is not None: + # TODO(#2439): Refactor out Request validation + if request.multimodal_prompt is not None or request.prompt: + raise AnthropicMessagesRequestError( + "Exactly one of Request.messages, Request.prompt or Request.multimodel_prompt should be set" + ) messages = cast(List[MessageParam], request.messages) if messages[0]["role"] == "system": system_message = messages[0] messages = messages[1:] + + elif request.multimodal_prompt is not None: + # TODO(#2439): Refactor out Request validation + if request.messages is not None or request.prompt: + raise AnthropicMessagesRequestError( + "Exactly one of Request.messages, Request.prompt or Request.multimodel_prompt should be set" + ) + blocks: List[Union[TextBlockParam, ImageBlockParam]] = [] + for media_object in request.multimodal_prompt.media_objects: + if media_object.is_type(IMAGE_TYPE): + # TODO(#2439): Refactor out Request validation + if not media_object.location: + raise Exception("MediaObject of image type has missing location field value") + + from helm.common.images_utils import encode_base64 + + base64_image: str = encode_base64(media_object.location, format="JPEG") + image_block: ImageBlockParam = { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": base64_image, + }, + } + blocks.append(image_block) + if media_object.is_type(TEXT_TYPE): + # TODO(#2439): Refactor out Request validation + if media_object.text is None: + raise ValueError("MediaObject of text type has missing text field value") + text_block: TextBlockParam = { + "type": "text", + "text": media_object.text, + } + blocks.append(text_block) + messages = [{"role": "user", "content": blocks}] + else: messages = [{"role": "user", "content": request.prompt}] diff --git a/src/helm/common/media_object.py b/src/helm/common/media_object.py index ddee88e778..d3fdf8292a 100644 --- a/src/helm/common/media_object.py +++ b/src/helm/common/media_object.py @@ -5,6 +5,7 @@ from typing import List, Optional +IMAGE_TYPE = "image" TEXT_TYPE = "text" diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml index 3af6a7d3e8..c2e0831960 100644 --- a/src/helm/config/model_metadata.yaml +++ b/src/helm/config/model_metadata.yaml @@ -235,7 +235,7 @@ models: creator_organization_name: Anthropic access: limited release_date: 2024-03-04 - tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] - name: anthropic/claude-3-opus-20240229 display_name: Claude 3 Opus (20240229) @@ -243,7 +243,7 @@ models: creator_organization_name: Anthropic access: limited release_date: 2024-03-04 - tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] # DEPRECATED: Please do not use. - name: anthropic/stanford-online-all-v4-s3