diff --git a/.gitignore b/.gitignore index 82ebe82..6c65b96 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ venv *.egg-info package-lock.json *.mjs -site/* \ No newline at end of file +site/* +backend/director/downloads \ No newline at end of file diff --git a/backend/.env.sample b/backend/.env.sample index 2ae880e..463b2fa 100644 --- a/backend/.env.sample +++ b/backend/.env.sample @@ -18,4 +18,7 @@ BEEP_AUDIO_ID= # Slack Agent SLACK_CHANNEL_NAME= -SLACK_BOT_TOKEN= \ No newline at end of file +SLACK_BOT_TOKEN= + +# Dubbing AGENT +ELEVENLABS_API_KEY= \ No newline at end of file diff --git a/backend/director/agents/dubbing.py b/backend/director/agents/dubbing.py new file mode 100644 index 0000000..4c2f75e --- /dev/null +++ b/backend/director/agents/dubbing.py @@ -0,0 +1,172 @@ +import logging +import os + +from director.constants import DOWNLOADS_PATH + +from director.agents.base import BaseAgent, AgentResponse, AgentStatus +from director.core.session import Session, VideoContent, MsgStatus, VideoData +from director.tools.videodb_tool import VideoDBTool +from director.tools.elevenlabs import ElevenLabsTool + +logger = logging.getLogger(__name__) + +SUPPORTED_ENGINES = ["elevenlabs"] +DUBBING_AGENT_PARAMETERS = { + "type": "object", + "properties": { + "video_id": { + "type": "string", + "description": "The unique identifier of the video that needs to be dubbed. This ID is used to retrieve the video from the VideoDB collection.", + }, + "target_language": { + "type": "string", + "description": "The target language for dubbing (e.g. 'Spanish', 'French', 'German'). The video's audio will be translated and dubbed into this language.", + }, + "target_language_code": { + "type": "string", + "description": "The target language code for dubbing (e.g. 'es' for Spanish, 'fr' for French, 'de' for German').", + }, + "collection_id": { + "type": "string", + "description": "The unique identifier of the VideoDB collection containing the video. Required to locate and access the correct video library.", + }, + "engine": { + "type": "string", + "description": "The dubbing engine to use. Default is 'elevenlabs'. Possible values include 'elevenlabs'.", + "default": "elevenlabs", + }, + "engine_params": { + "type": "object", + "description": "Optional parameters for the dubbing engine.", + }, + }, + "required": [ + "video_id", + "target_language", + "target_language_code", + "collection_id", + "engine", + ], +} + + +class DubbingAgent(BaseAgent): + def __init__(self, session: Session, **kwargs): + self.agent_name = "dubbing" + self.description = ( + "This is an agent to dub the given video into a target language" + ) + self.parameters = DUBBING_AGENT_PARAMETERS + super().__init__(session=session, **kwargs) + + def run( + self, + video_id: str, + target_language: str, + target_language_code: str, + collection_id: str, + engine: str, + engine_params: dict = {}, + *args, + **kwargs, + ) -> AgentResponse: + """ + Process the video dubbing based on the given video ID. + :param str video_id: The ID of the video to process. + :param str target_language: The target language name for dubbing (e.g. Spanish). + :param str target_language_code: The target language code for dubbing (e.g. es). + :param str collection_id: The ID of the collection to process. + :param str engine: The dubbing engine to use. Default is 'elevenlabs'. + :param dict engine_params: Optional parameters for the dubbing engine. + :param args: Additional positional arguments. + :param kwargs: Additional keyword arguments. + :return: The response containing information about the dubbing operation. + :rtype: AgentResponse + """ + try: + self.videodb_tool = VideoDBTool(collection_id=collection_id) + + # Get video audio file + video = self.videodb_tool.get_video(video_id) + if not video: + raise Exception(f"Video {video_id} not found") + + if engine not in SUPPORTED_ENGINES: + raise Exception(f"{engine} not supported") + + video_content = VideoContent( + agent_name=self.agent_name, + status=MsgStatus.progress, + status_message="Processing...", + ) + self.output_message.content.append(video_content) + self.output_message.actions.append("Downloading video") + self.output_message.push_update() + + download_response = self.videodb_tool.download(video["stream_url"]) + + os.makedirs(DOWNLOADS_PATH, exist_ok=True) + dubbed_file_path = f"{DOWNLOADS_PATH}/{video_id}_dubbed.mp4" + + if engine == "elevenlabs": + ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") + if not ELEVENLABS_API_KEY: + raise Exception("Elevenlabs API key not present in .env") + elevenlabs_tool = ElevenLabsTool(api_key=ELEVENLABS_API_KEY) + job_id = elevenlabs_tool.create_dub_job( + source_url=download_response["download_url"], + target_language=target_language_code, + ) + self.output_message.actions.append( + f"Dubbing job initiated with Job ID: {job_id}" + ) + self.output_message.push_update() + + self.output_message.actions.append( + "Waiting for dubbing process to complete.." + ) + self.output_message.push_update() + elevenlabs_tool.wait_for_dub_job(job_id) + + self.output_message.actions.append("Downloading dubbed video") + self.output_message.push_update() + elevenlabs_tool.download_dub_file( + job_id, + target_language_code, + dubbed_file_path, + ) + + self.output_message.actions.append( + f"Uploading dubbed video to VideoDB as '[Dubbed in {target_language}] {video['name']}'" + ) + self.output_message.push_update() + + dubbed_video = self.videodb_tool.upload( + dubbed_file_path, + source_type="file_path", + media_type="video", + name=f"[Dubbed in {target_language}] {video['name']}", + ) + + video_content.video = VideoData(stream_url=dubbed_video["stream_url"]) + video_content.status = MsgStatus.success + video_content.status_message = f"Dubbed video in {target_language} has been successfully added to your video. Here is your stream." + self.output_message.publish() + + return AgentResponse( + status=AgentStatus.SUCCESS, + message=f"Successfully dubbed video '{video['name']}' to {target_language}", + data={ + "stream_url": dubbed_video["stream_url"], + "video_id": dubbed_video["id"], + }, + ) + + except Exception as e: + video_content.status = MsgStatus.error + video_content.status_message = "An error occurred while dubbing the video." + self.output_message.publish() + logger.exception(f"Error in {self.agent_name} agent: {str(e)}") + return AgentResponse( + status=AgentStatus.ERROR, message=f"Failed to dub video: {str(e)}" + ) diff --git a/backend/director/constants.py b/backend/director/constants.py index a3fb57b..782e746 100644 --- a/backend/director/constants.py +++ b/backend/director/constants.py @@ -26,3 +26,5 @@ class EnvPrefix(str, Enum): OPENAI_ = "OPENAI_" ANTHROPIC_ = "ANTHROPIC_" + +DOWNLOADS_PATH="director/downloads" diff --git a/backend/director/handler.py b/backend/director/handler.py index dcee183..2c2b46a 100644 --- a/backend/director/handler.py +++ b/backend/director/handler.py @@ -17,6 +17,7 @@ from director.agents.stream_video import StreamVideoAgent from director.agents.subtitle import SubtitleAgent from director.agents.slack_agent import SlackAgent +from director.agents.dubbing import DubbingAgent from director.core.session import Session, InputMessage, MsgStatus @@ -48,6 +49,7 @@ def __init__(self, db, **kwargs): StreamVideoAgent, SubtitleAgent, SlackAgent, + DubbingAgent, ] def add_videodb_state(self, session): diff --git a/backend/director/tools/elevenlabs.py b/backend/director/tools/elevenlabs.py new file mode 100644 index 0000000..3bc1e6c --- /dev/null +++ b/backend/director/tools/elevenlabs.py @@ -0,0 +1,83 @@ +import os +import base64 +import json +import time +from typing import Optional +from elevenlabs.client import ElevenLabs +import traceback +from elevenlabs import VoiceSettings + + +class ElevenLabsTool: + def __init__(self, api_key: str): + if api_key: + self.client = ElevenLabs(api_key=api_key) + self.voice_settings = VoiceSettings( + stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True + ) + + def create_dub_job( + self, + source_url: str, + target_language: str, + ) -> Optional[str]: + """ + Dub an audio or video file from one language to another. + + Args: + input_file_path: Path to input file + file_format: Format of input file (e.g. "audio/mpeg") + source_language: Source language code (e.g. "en") + target_language: Target language code (e.g. "es") + + Returns: + Path to dubbed file if successful, None if failed + """ + try: + response = self.client.dubbing.dub_a_video_or_an_audio_file( + source_url=source_url, + target_lang=target_language, + ) + + dubbing_id = response.dubbing_id + return dubbing_id + + except Exception as e: + return {"error": str(e)} + + def wait_for_dub_job(self, dubbing_id: str) -> bool: + """Wait for dubbing to complete.""" + MAX_ATTEMPTS = 120 + CHECK_INTERVAL = 30 # In seconds + + for _ in range(MAX_ATTEMPTS): + try: + metadata = self.client.dubbing.get_dubbing_project_metadata(dubbing_id) + print("this is metadata", metadata) + if metadata.status == "dubbed": + return True + elif metadata.status == "dubbing": + time.sleep(CHECK_INTERVAL) + else: + return False + except Exception as e: + print(traceback.format_exc()) + print(f"Error checking dubbing status: {str(e)}") + return False + return False + + def download_dub_file( + self, dubbing_id: str, language_code: str, output_path: str + ) -> Optional[str]: + """Download the dubbed file.""" + try: + with open(output_path, "wb") as file: + for chunk in self.client.dubbing.get_dubbed_file( + dubbing_id, language_code + ): + file.write(chunk) + return output_path + except Exception as e: + print(traceback.format_exc()) + print(f"Error downloading dubbed file: {str(e)}") + return None diff --git a/backend/requirements.txt b/backend/requirements.txt index 0a7e333..734ecca 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,5 +1,6 @@ -e . anthropic==0.37.1 +elevenlabs==1.9.0 Flask==3.0.3 Flask-SocketIO==5.3.6 Flask-Cors==4.0.1