Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SD2-913-new-feature-video-generation #292

Merged
merged 4 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions edenai_apis/apis/amazon/amazon_video_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from typing import Optional
import base64
from io import BytesIO

from edenai_apis.features.video.explicit_content_detection_async.explicit_content_detection_async_dataclass import (
ExplicitContentDetectionAsyncDataClass,
)
Expand All @@ -13,6 +17,9 @@
from edenai_apis.features.video.text_detection_async.text_detection_async_dataclass import (
TextDetectionAsyncDataClass,
)
from edenai_apis.features.video.generation_async.generation_async_dataclass import (
GenerationAsyncDataClass,
)
from edenai_apis.features.video.video_interface import VideoInterface
from edenai_apis.utils.exception import (
ProviderException,
Expand All @@ -33,6 +40,10 @@
amazon_video_explicit_parser,
)
from .config import clients
from edenai_apis.utils.upload_s3 import (
USER_PROCESS,
upload_file_bytes_to_s3,
)


class AmazonVideoApi(VideoInterface):
Expand Down Expand Up @@ -100,6 +111,46 @@ def video__explicit_content_detection_async__launch_job(

return AsyncLaunchJobResponseType(provider_job_id=job_id)

# Launch job video generation
def video__generation_async__launch_job(
self,
text: str,
duration: Optional[int] = 6,
fps: Optional[int] = 24,
dimension: Optional[str] = "1280x720",
seed: Optional[float] = 12,
file: Optional[str] = None,
file_url: Optional[str] = None,
model: Optional[str] = None,
) -> AsyncLaunchJobResponseType:
text_input = {"text": text}
if file:
with open(file, "rb") as file_:
file_content = file_.read()
input_image_base64 = base64.b64encode(file_content).decode("utf-8")
images = [{"format": "png", "source": {"bytes": input_image_base64}}]
text_input["images"] = images
model_input = {
"taskType": "TEXT_VIDEO",
"textToVideoParams": text_input,
"videoGenerationConfig": {
"durationSeconds": duration,
"fps": fps,
"dimension": dimension,
"seed": seed,
},
}
request_params = {
"modelId": model,
"modelInput": model_input,
"outputDataConfig": {"s3OutputDataConfig": {"s3Uri": "s3://us-storage"}},
}
response = handle_amazon_call(
self.clients["bedrock"].start_async_invoke, **request_params
)
provider_job_id = response.get("invocationArn")
return AsyncLaunchJobResponseType(provider_job_id=provider_job_id)

# Get job result for label detection
def video__label_detection_async__get_job_result(
self, provider_job_id: str
Expand Down Expand Up @@ -331,3 +382,35 @@ def video__explicit_content_detection_async__get_job_result(
provider_job_id=provider_job_id,
)
return AsyncPendingResponseType(provider_job_id=response["JobStatus"])

# Get job result for generation
def video__generation_async__get_job_result(
self, provider_job_id: str
) -> GenerationAsyncDataClass:
invocation = handle_amazon_call(
self.clients["bedrock"].get_async_invoke,
**{"invocationArn": provider_job_id},
)
if invocation["status"] == "Completed":
file_name = invocation["outputDataConfig"]["s3OutputDataConfig"][
"s3Uri"
].split("/")[-1]
response = self.clients["s3"].get_object(
Bucket="us-storage", Key=f"{file_name}/output.mp4"
)
data = response["Body"].read()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part may be a problem since we load the video

video_content = base64.b64encode(data).decode("utf-8")
resource_url = upload_file_bytes_to_s3(BytesIO(data), ".mp4", USER_PROCESS)
return AsyncResponseType(
original_response=invocation,
standardized_response=GenerationAsyncDataClass(
video=video_content, video_resource_url=resource_url
),
provider_job_id=provider_job_id,
)
if invocation["status"] == "InProgress":
return AsyncPendingResponseType(provider_job_id=provider_job_id)

if invocation["status"] == "Failed":
failure_message = invocation["failureMessage"]
raise ProviderException(failure_message)
38 changes: 20 additions & 18 deletions edenai_apis/apis/amazon/info.json
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@
"fr",
"de"
],
"documents" : [
"documents": [
"receipt",
"invoice"
],
Expand Down Expand Up @@ -623,14 +623,14 @@
},
"version": "boto3 (v1.15.18)"
},
"generation" : {
"generation": {
"constraints": {
"models": [
"titan-tg1-large",
"titan-text-lite-v1",
"titan-text-express-v1"
],
"default_model" : "titan-tg1-large"
"default_model": "titan-tg1-large"
},
"version": "v1"
}
Expand Down Expand Up @@ -730,8 +730,7 @@
"version": "boto3 (v1.15.18)"
},
"object_detection": {
"constraints": {
},
"constraints": {},
"version": "boto3 (v1.15.18)"
},
"face_recognition": {
Expand Down Expand Up @@ -760,9 +759,9 @@
"face_compare": {
"version": "boto3 (v1.15.18)"
},
"generation":{
"generation": {
"constraints": {
"resolutions":[
"resolutions": [
"256x256",
"512x512",
"1024x1024"
Expand All @@ -772,18 +771,18 @@
"titan-image-generator-v1_standard"
],
"default_model": "titan-image-generator-v1_premium"
},
},
"version": "boto3 (v1.29.6)"
},
"embeddings": {
"constraints": {
"models": [
"titan-embed-image-v1"
],
"default_model": "titan-embed-image-v1"
},
"version": "v1"
}
"embeddings": {
"constraints": {
"models": [
"titan-embed-image-v1"
],
"default_model": "titan-embed-image-v1"
},
"version": "v1"
}
},
"video": {
"label_detection_async": {
Expand Down Expand Up @@ -830,6 +829,9 @@
"video/quicktime"
]
}
},
"generation_async": {
"version": "boto3"
}
}
}
}
34 changes: 34 additions & 0 deletions edenai_apis/apis/amazon/outputs/video/generation_async_output.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions edenai_apis/features/video/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@
question_answer_async_arguments,
QuestionAnswerAsyncDataClass,
)
from .generation_async import generation_async_arguments, GenerationAsyncDataClass
Binary file added edenai_apis/features/video/data/image.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions edenai_apis/features/video/generation_async/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .generation_async_args import generation_async_arguments
from .generation_async_dataclass import GenerationAsyncDataClass
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import mimetypes
import os
from typing import Dict

from pydub.utils import mediainfo

from edenai_apis.utils.files import FileInfo, FileWrapper


def generation_async_arguments(provider_name: str) -> Dict:
feature_path = os.path.dirname(os.path.dirname(__file__))

data_path = os.path.join(feature_path, "data")

image_path = f"{data_path}/image.jpg"

mime_type = mimetypes.guess_type(image_path)[0]
file_info = FileInfo(
os.stat(image_path).st_size,
mime_type,
[extension[1:] for extension in mimetypes.guess_all_extensions(mime_type)],
mediainfo(image_path).get("sample_rate", "44100"),
mediainfo(image_path).get("channels", "1"),
)
file_wrapper = FileWrapper(image_path, "", file_info)
return {
"text": "Make this image move like the ocean",
"duration": 6,
"fps": 24,
"seed": 12,
"dimension": "1280x720",
"file": file_wrapper,
"settings": {"amazon": "amazon.nova-reel-v1:0"},
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from pydantic import BaseModel, StrictStr


class GenerationAsyncDataClass(BaseModel):
video: str
video_resource_url: StrictStr
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"video": "base64...",
"video_resource_url": "str"
}
32 changes: 32 additions & 0 deletions edenai_apis/features/video/video_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
PersonTrackingAsyncDataClass,
TextDetectionAsyncDataClass,
QuestionAnswerDataClass,
GenerationAsyncDataClass,
)
from edenai_apis.utils.types import AsyncBaseResponseType, AsyncLaunchJobResponseType

Expand Down Expand Up @@ -235,3 +236,34 @@ def video__question_answer_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType:
raise NotImplementedError

### Video generation methods
@abstractmethod
def video__generation_async__launch_job(
self,
text: str,
duration: Optional[int] = 6,
fps: Optional[int] = 24,
dimension: Optional[str] = "1280x720",
seed: Optional[float] = 12,
file: Optional[str] = None,
file_url: Optional[str] = None,
model: Optional[str] = None,
) -> AsyncLaunchJobResponseType:
"""
Launch an asynchronous job to detect text in a video

Args:
file (BufferedReader): video to analyze
"""
raise NotImplementedError

@abstractmethod
def video__generation_async__get_job_result(
self, provider_job_id: str
) -> AsyncBaseResponseType[GenerationAsyncDataClass]:
"""Get the result of an asynchronous job by its ID
Args:
- provider_job_id (str): id of async job
"""
raise NotImplementedError
2 changes: 1 addition & 1 deletion edenai_apis/tests/features/test_async_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from edenai_apis.interface import IS_MONITORING
from edenai_apis.utils.monitoring import insert_api_call

MAX_TIME = 180
MAX_TIME = 300
TIME_BETWEEN_CHECK = 10
INTERFACE_MODULE = importlib.import_module("edenai_apis.interface_v2")

Expand Down
2 changes: 1 addition & 1 deletion edenai_apis/tests/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# TEXT_AUTOML_CLASSIFICATION = ["training_async", "prediction_async"]

MAX_TIME = 220
MAX_TIME = 280
TIME_BETWEEN_CHECK = 10


Expand Down
Loading