Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
DninoAdnane committed Nov 29, 2023
2 parents e9efff4 + c5f241b commit a579f23
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 9 deletions.
2 changes: 2 additions & 0 deletions AVAILABLES_FEATURES_AND_PROVIDERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
|----------|-------------|
| **embeddings** | alephalpha |
| **question_answer** | alephalpha |
| | openai |
| **explicit_content** | amazon |
| | api4ai |
| | clarifai |
Expand Down Expand Up @@ -711,6 +712,7 @@
| **audio** | speech_to_text_async |
| | text_to_speech |
| **image** | generation |
| | question_answer |
| **text** | anonymization |
| | chat |
| | code_generation |
Expand Down
9 changes: 9 additions & 0 deletions edenai_apis/apis/openai/info.json
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,15 @@
"default_model": "dall-e-2"
},
"version": "v1Beta"
},
"question_answer": {
"constraints": {
"models": [
"gpt-4-vision-preview"
],
"default_model": "gpt-4-vision-preview"
},
"version": "v1"
}
},
"audio": {
Expand Down
74 changes: 70 additions & 4 deletions edenai_apis/apis/openai/openai_image_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
from io import BytesIO
from json import JSONDecodeError
from typing import Sequence, Literal, Optional

import requests
Expand All @@ -14,6 +15,8 @@
from .helpers import (
get_openapi_response,
)
from ...features.image.question_answer import QuestionAnswerDataClass
from ...utils.exception import ProviderException


class OpenaiImageApi(ImageInterface):
Expand All @@ -22,7 +25,7 @@ def image__generation(
text: str,
resolution: Literal["256x256", "512x512", "1024x1024"],
num_images: int = 1,
model: Optional[str] = None
model: Optional[str] = None,
) -> ResponseType[ImageGenerationDataClass]:
url = f"{self.url}/images/generations"
payload = {
Expand All @@ -32,9 +35,7 @@ def image__generation(
"size": resolution,
"response_format": "b64_json",
}
response = requests.post(
url, json=payload, headers=self.headers
)
response = requests.post(url, json=payload, headers=self.headers)
original_response = get_openapi_response(response)

generations: Sequence[GeneratedImageDataClass] = []
Expand All @@ -54,3 +55,68 @@ def image__generation(
original_response=original_response,
standardized_response=ImageGenerationDataClass(items=generations),
)

def image__question_answer(
self,
file: str,
temperature: float,
max_tokens: int,
file_url: str = "",
model: Optional[str] = None,
question: Optional[str] = None,
) -> ResponseType[QuestionAnswerDataClass]:
with open(file, "rb") as fstream:
file_content = fstream.read()
file_b64 = base64.b64encode(file_content).decode("utf-8")

url = f"{self.url}/chat/completions"
payload = {
"model": "gpt-4-vision-preview" or model,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": question or "Describe the following image",
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{file_b64}"
},
},
],
},
],
"max_tokens": max_tokens,
"temperature": temperature,
}

response = requests.post(url, json=payload, headers=self.headers)

if response.status_code >= 500:
raise ProviderException(
f"OpenAI API is not available. Status code: {response.status_code}"
)

if response.status_code != 200:
raise ProviderException(
message=response.text, code=response.status_code
)

try:
original_response = response.json()
except JSONDecodeError as exc:
raise ProviderException(
message="Invalid JSON response", code=response.status_code
) from exc

standardized_response = QuestionAnswerDataClass(
answers=[original_response["choices"][0]["message"]["content"]]
)

return ResponseType[QuestionAnswerDataClass](
original_response=original_response,
standardized_response=standardized_response,
)
30 changes: 30 additions & 0 deletions edenai_apis/apis/openai/outputs/image/question_answer_output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"original_response": {
"id": "chatcmpl-8QBuI35InuUHU0ZWIxncpk6auVn8v",
"object": "chat.completion",
"created": 1701252990,
"model": "gpt-4-1106-vision-preview",
"usage": {
"prompt_tokens": 1120,
"completion_tokens": 64,
"total_tokens": 1184
},
"choices": [
{
"message": {
"role": "assistant",
"content": "The image contains a combination of logos from four major technology companies, often referred to collectively as \"FAANG\" (Facebook, Apple, Amazon, Netflix, Google), but without the Netflix logo. From left to right, the logos are:\n\n1. Facebook: The blue square with a lowercase 'f' is the logo"
},
"finish_details": {
"type": "max_tokens"
},
"index": 0
}
]
},
"standardized_response": {
"answers": [
"The image contains a combination of logos from four major technology companies, often referred to collectively as \"FAANG\" (Facebook, Apple, Amazon, Netflix, Google), but without the Netflix logo. From left to right, the logos are:\n\n1. Facebook: The blue square with a lowercase 'f' is the logo"
]
}
}
8 changes: 5 additions & 3 deletions edenai_apis/features/image/image_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,11 @@ def image__question_answer(
Args:
file (BufferedReader): image to analyze
question (str): your query
maximum_tokens (int): maximum number of tokens to be generated
model (str): which ai model to use, default to 'None'
file_url (str, optional): url of the image to analyze
temperature: (float): temperature of the answer
max_tokens (int): maximum number of tokens to be generated
question (str, optional): question to ask, if default to `None` a description of the image will be asked
model (str, optional): which AI model to use, default to 'None'
"""
raise NotImplementedError

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# pylint: disable=locally-disabled, line-too-long
import mimetypes
import os
from typing import Dict
from typing import Dict, Any

from pydub.utils import mediainfo

Expand All @@ -24,7 +24,7 @@
file_wrapper = FileWrapper(image_path, "", file_info)


def question_answer_arguments(provider_name: str) -> Dict:
def question_answer_arguments(provider_name: str) -> Dict[str, Any]:
return {
"file": file_wrapper,
"question": "What are the logos on the image ?",
Expand Down

0 comments on commit a579f23

Please sign in to comment.