Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hr/download #132

Merged
merged 14 commits into from
Jul 11, 2024
1 change: 1 addition & 0 deletions aana/core/models/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ class VideoMetadata(BaseModel):

title: str = Field(None, description="The title of the video.")
description: str = Field(None, description="The description of the video.")
duration: float | None = Field(None, description="The duration of the video.")
movchan74 marked this conversation as resolved.
Show resolved Hide resolved
model_config = ConfigDict(
json_schema_extra={
"description": "Metadata of a video.",
Expand Down
34 changes: 33 additions & 1 deletion aana/integrations/external/yt_dlp.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,48 @@
import hashlib
from pathlib import Path
from typing_extensions import TypedDict

import yt_dlp
from yt_dlp.utils import DownloadError

from aana.configs.settings import settings
from aana.core.models.video import Video, VideoInput
from aana.core.models.video import Video, VideoInput, VideoMetadata
from aana.exceptions.io import (
DownloadException,
)


def get_video_metadata(video_url: str) -> VideoMetadata:
HRashidi marked this conversation as resolved.
Show resolved Hide resolved
"""Fetch video's metadata for a url.

Args:
video_url (str): the video input url

Returns:
metadata (VideoMetadata): the metadata of the video
movchan74 marked this conversation as resolved.
Show resolved Hide resolved
"""

ydl_options = {
"extract_flat": True,
"hls_prefer_native": True,
"extractor_args": {"youtube": {"skip": ["hls", "dash"]}},
}
try:
with yt_dlp.YoutubeDL(ydl_options) as ydl:
info = ydl.extract_info(video_url, download=False)
title = info.get("title", "")
description = info.get("description", "")
duration = info.get("duration")
return VideoMetadata(
title=title,
description=description,
duration= duration,
)
except DownloadError as e:
error_message = e.msg.split(";")[0]
raise DownloadException(url=video_url, msg=error_message) from e


def download_video(video_input: VideoInput | Video) -> Video:
"""Downloads videos for a VideoInput object.

Expand Down
20 changes: 17 additions & 3 deletions aana/projects/chat_with_video/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from aana.exceptions.db import MediaIdAlreadyExistsException, UnfinishedVideoException
from aana.exceptions.io import VideoTooLongException
from aana.integrations.external.decord import generate_frames, get_video_duration
from aana.integrations.external.yt_dlp import download_video
from aana.integrations.external.yt_dlp import download_video, get_video_metadata
from aana.processors.remote import run_remote
from aana.processors.video import extract_audio, generate_combined_timeline
from aana.projects.chat_with_video.const import (
Expand Down Expand Up @@ -154,8 +154,22 @@ async def run(
if check_media_id_exist(media_id):
raise MediaIdAlreadyExistsException(table_name="media", media_id=video)

video_duration = None
if video.url is not None:
video_metadata = get_video_metadata(video.url)
video_duration = video_metadata.duration

# precheck for max video length before actually download the video if possible
if video_duration and video_duration > max_video_len:
raise VideoTooLongException(
video=video,
video_len=video_duration,
max_len=max_video_len,
)

video_obj: Video = await run_remote(download_video)(video_input=video)
video_duration = await run_remote(get_video_duration)(video=video_obj)
if video_duration is None:
video_duration = await run_remote(get_video_duration)(video=video_obj)

if video_duration > max_video_len:
raise VideoTooLongException(
Expand All @@ -168,7 +182,7 @@ async def run(
yield {
"media_id": media_id,
"metadata": VideoMetadata(
title=video_obj.title, description=video_obj.description
title=video_obj.title, description=video_obj.description, duration=video_duration
),
}

Expand Down
1 change: 1 addition & 0 deletions aana/storage/services/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,4 +392,5 @@ def load_video_metadata(
return VideoMetadata(
title=video_entity.title,
description=video_entity.description,
duration=video_entity.duration
)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading