Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hr/download #132

Merged
merged 14 commits into from
Jul 11, 2024
3 changes: 3 additions & 0 deletions aana/core/models/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ class VideoMetadata(BaseModel):

title: str = Field(None, description="The title of the video.")
description: str = Field(None, description="The description of the video.")
duration: float | None = Field(
None, description="The duration of the video in seconds."
)
model_config = ConfigDict(
json_schema_extra={
"description": "Metadata of a video.",
Expand Down
7 changes: 4 additions & 3 deletions aana/deployments/vllm_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class VLLMConfig(BaseModel):
chat_template (str): the name of the chat template, if not provided, the chat template from the model will be used
but some models may not have a chat template (optional, default: None)
enforce_eager: whether to enforce eager execution (optional, default: False)
engine_args: extra engine arguments (optional, default: {})
engine_args: extra engine arguments (optional, default: {})

"""

Expand All @@ -57,6 +57,7 @@ class VLLMConfig(BaseModel):
enforce_eager: bool | None = Field(default=False)
engine_args: CustomConfig = {}


@serve.deployment
class VLLMDeployment(BaseTextGenerationDeployment):
"""Deployment to serve large language models using vLLM."""
Expand Down Expand Up @@ -101,7 +102,7 @@ async def apply_config(self, config: dict[str, Any]):
enforce_eager=config_obj.enforce_eager,
gpu_memory_utilization=self.gpu_memory_utilization,
max_model_len=config_obj.max_model_len,
**config_obj.engine_args
**config_obj.engine_args,
)

# TODO: check if the model is already loaded.
Expand Down Expand Up @@ -153,7 +154,7 @@ async def generate_stream(
results_generator = self.engine.generate(
sampling_params=sampling_params_vllm,
request_id=request_id,
inputs=TokensPrompt(prompt_token_ids=prompt_token_ids)
inputs=TokensPrompt(prompt_token_ids=prompt_token_ids),
)

num_returned = 0
Expand Down
40 changes: 39 additions & 1 deletion aana/integrations/external/yt_dlp.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,51 @@
import hashlib
from pathlib import Path
from typing_extensions import TypedDict

import yt_dlp
from yt_dlp.utils import DownloadError

from aana.configs.settings import settings
from aana.core.models.video import Video, VideoInput
from aana.core.models.video import Video, VideoInput, VideoMetadata
from aana.exceptions.io import (
DownloadException,
)


def get_video_metadata(video_url: str) -> VideoMetadata:
HRashidi marked this conversation as resolved.
Show resolved Hide resolved
"""Fetch video's metadata for a url.

Args:
video_url (str): the video input url

Returns:
metadata (VideoMetadata): the metadata of the video
movchan74 marked this conversation as resolved.
Show resolved Hide resolved

Raises:
DownloadException: Request does not succeed.
"""

ydl_options = {
"extract_flat": True,
"hls_prefer_native": True,
"extractor_args": {"youtube": {"skip": ["hls", "dash"]}},
}
try:
with yt_dlp.YoutubeDL(ydl_options) as ydl:
info = ydl.extract_info(video_url, download=False)
title = info.get("title", "")
description = info.get("description", "")
duration = info.get("duration")
return VideoMetadata(
title=title,
description=description,
duration=duration,
)
except DownloadError as e:
error_message = e.msg.split(";")[0]
raise DownloadException(url=video_url, msg=error_message) from e


def download_video(video_input: VideoInput | Video) -> Video:
"""Downloads videos for a VideoInput object.

Expand All @@ -19,6 +54,9 @@ def download_video(video_input: VideoInput | Video) -> Video:

Returns:
Video: the video object

Raises:
DownloadException: Request does not succeed.
"""
if isinstance(video_input, Video):
return video_input
Expand Down
22 changes: 19 additions & 3 deletions aana/projects/chat_with_video/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from aana.exceptions.db import MediaIdAlreadyExistsException, UnfinishedVideoException
from aana.exceptions.io import VideoTooLongException
from aana.integrations.external.decord import generate_frames, get_video_duration
from aana.integrations.external.yt_dlp import download_video
from aana.integrations.external.yt_dlp import download_video, get_video_metadata
from aana.processors.remote import run_remote
from aana.processors.video import extract_audio, generate_combined_timeline
from aana.projects.chat_with_video.const import (
Expand Down Expand Up @@ -154,8 +154,22 @@ async def run(
if check_media_id_exist(media_id):
raise MediaIdAlreadyExistsException(table_name="media", media_id=video)

video_duration = None
if video.url is not None:
video_metadata = get_video_metadata(video.url)
video_duration = video_metadata.duration

# precheck for max video length before actually download the video if possible
if video_duration and video_duration > max_video_len:
raise VideoTooLongException(
video=video,
video_len=video_duration,
max_len=max_video_len,
)

video_obj: Video = await run_remote(download_video)(video_input=video)
video_duration = await run_remote(get_video_duration)(video=video_obj)
if video_duration is None:
video_duration = await run_remote(get_video_duration)(video=video_obj)

if video_duration > max_video_len:
raise VideoTooLongException(
Expand All @@ -168,7 +182,9 @@ async def run(
yield {
"media_id": media_id,
"metadata": VideoMetadata(
title=video_obj.title, description=video_obj.description
title=video_obj.title,
description=video_obj.description,
duration=video_duration,
),
}

Expand Down
1 change: 1 addition & 0 deletions aana/storage/services/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,4 +392,5 @@ def load_video_metadata(
return VideoMetadata(
title=video_entity.title,
description=video_entity.description,
duration=video_entity.duration,
)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading