-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add dubbing agent with elevenlabs tools (#59)
* add dubbing agent & elevenlabs tools
- Loading branch information
1 parent
d9e6b1f
commit ad39c79
Showing
7 changed files
with
266 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,4 +14,5 @@ venv | |
*.egg-info | ||
package-lock.json | ||
*.mjs | ||
site/* | ||
site/* | ||
backend/director/downloads |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
import logging | ||
import os | ||
|
||
from director.constants import DOWNLOADS_PATH | ||
|
||
from director.agents.base import BaseAgent, AgentResponse, AgentStatus | ||
from director.core.session import Session, VideoContent, MsgStatus, VideoData | ||
from director.tools.videodb_tool import VideoDBTool | ||
from director.tools.elevenlabs import ElevenLabsTool | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
SUPPORTED_ENGINES = ["elevenlabs"] | ||
DUBBING_AGENT_PARAMETERS = { | ||
"type": "object", | ||
"properties": { | ||
"video_id": { | ||
"type": "string", | ||
"description": "The unique identifier of the video that needs to be dubbed. This ID is used to retrieve the video from the VideoDB collection.", | ||
}, | ||
"target_language": { | ||
"type": "string", | ||
"description": "The target language for dubbing (e.g. 'Spanish', 'French', 'German'). The video's audio will be translated and dubbed into this language.", | ||
}, | ||
"target_language_code": { | ||
"type": "string", | ||
"description": "The target language code for dubbing (e.g. 'es' for Spanish, 'fr' for French, 'de' for German').", | ||
}, | ||
"collection_id": { | ||
"type": "string", | ||
"description": "The unique identifier of the VideoDB collection containing the video. Required to locate and access the correct video library.", | ||
}, | ||
"engine": { | ||
"type": "string", | ||
"description": "The dubbing engine to use. Default is 'elevenlabs'. Possible values include 'elevenlabs'.", | ||
"default": "elevenlabs", | ||
}, | ||
"engine_params": { | ||
"type": "object", | ||
"description": "Optional parameters for the dubbing engine.", | ||
}, | ||
}, | ||
"required": [ | ||
"video_id", | ||
"target_language", | ||
"target_language_code", | ||
"collection_id", | ||
"engine", | ||
], | ||
} | ||
|
||
|
||
class DubbingAgent(BaseAgent): | ||
def __init__(self, session: Session, **kwargs): | ||
self.agent_name = "dubbing" | ||
self.description = ( | ||
"This is an agent to dub the given video into a target language" | ||
) | ||
self.parameters = DUBBING_AGENT_PARAMETERS | ||
super().__init__(session=session, **kwargs) | ||
|
||
def run( | ||
self, | ||
video_id: str, | ||
target_language: str, | ||
target_language_code: str, | ||
collection_id: str, | ||
engine: str, | ||
engine_params: dict = {}, | ||
*args, | ||
**kwargs, | ||
) -> AgentResponse: | ||
""" | ||
Process the video dubbing based on the given video ID. | ||
:param str video_id: The ID of the video to process. | ||
:param str target_language: The target language name for dubbing (e.g. Spanish). | ||
:param str target_language_code: The target language code for dubbing (e.g. es). | ||
:param str collection_id: The ID of the collection to process. | ||
:param str engine: The dubbing engine to use. Default is 'elevenlabs'. | ||
:param dict engine_params: Optional parameters for the dubbing engine. | ||
:param args: Additional positional arguments. | ||
:param kwargs: Additional keyword arguments. | ||
:return: The response containing information about the dubbing operation. | ||
:rtype: AgentResponse | ||
""" | ||
try: | ||
self.videodb_tool = VideoDBTool(collection_id=collection_id) | ||
|
||
# Get video audio file | ||
video = self.videodb_tool.get_video(video_id) | ||
if not video: | ||
raise Exception(f"Video {video_id} not found") | ||
|
||
if engine not in SUPPORTED_ENGINES: | ||
raise Exception(f"{engine} not supported") | ||
|
||
video_content = VideoContent( | ||
agent_name=self.agent_name, | ||
status=MsgStatus.progress, | ||
status_message="Processing...", | ||
) | ||
self.output_message.content.append(video_content) | ||
self.output_message.actions.append("Downloading video") | ||
self.output_message.push_update() | ||
|
||
download_response = self.videodb_tool.download(video["stream_url"]) | ||
|
||
os.makedirs(DOWNLOADS_PATH, exist_ok=True) | ||
dubbed_file_path = f"{DOWNLOADS_PATH}/{video_id}_dubbed.mp4" | ||
|
||
if engine == "elevenlabs": | ||
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") | ||
if not ELEVENLABS_API_KEY: | ||
raise Exception("Elevenlabs API key not present in .env") | ||
elevenlabs_tool = ElevenLabsTool(api_key=ELEVENLABS_API_KEY) | ||
job_id = elevenlabs_tool.create_dub_job( | ||
source_url=download_response["download_url"], | ||
target_language=target_language_code, | ||
) | ||
self.output_message.actions.append( | ||
f"Dubbing job initiated with Job ID: {job_id}" | ||
) | ||
self.output_message.push_update() | ||
|
||
self.output_message.actions.append( | ||
"Waiting for dubbing process to complete.." | ||
) | ||
self.output_message.push_update() | ||
elevenlabs_tool.wait_for_dub_job(job_id) | ||
|
||
self.output_message.actions.append("Downloading dubbed video") | ||
self.output_message.push_update() | ||
elevenlabs_tool.download_dub_file( | ||
job_id, | ||
target_language_code, | ||
dubbed_file_path, | ||
) | ||
|
||
self.output_message.actions.append( | ||
f"Uploading dubbed video to VideoDB as '[Dubbed in {target_language}] {video['name']}'" | ||
) | ||
self.output_message.push_update() | ||
|
||
dubbed_video = self.videodb_tool.upload( | ||
dubbed_file_path, | ||
source_type="file_path", | ||
media_type="video", | ||
name=f"[Dubbed in {target_language}] {video['name']}", | ||
) | ||
|
||
video_content.video = VideoData(stream_url=dubbed_video["stream_url"]) | ||
video_content.status = MsgStatus.success | ||
video_content.status_message = f"Dubbed video in {target_language} has been successfully added to your video. Here is your stream." | ||
self.output_message.publish() | ||
|
||
return AgentResponse( | ||
status=AgentStatus.SUCCESS, | ||
message=f"Successfully dubbed video '{video['name']}' to {target_language}", | ||
data={ | ||
"stream_url": dubbed_video["stream_url"], | ||
"video_id": dubbed_video["id"], | ||
}, | ||
) | ||
|
||
except Exception as e: | ||
video_content.status = MsgStatus.error | ||
video_content.status_message = "An error occurred while dubbing the video." | ||
self.output_message.publish() | ||
logger.exception(f"Error in {self.agent_name} agent: {str(e)}") | ||
return AgentResponse( | ||
status=AgentStatus.ERROR, message=f"Failed to dub video: {str(e)}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,3 +26,5 @@ class EnvPrefix(str, Enum): | |
|
||
OPENAI_ = "OPENAI_" | ||
ANTHROPIC_ = "ANTHROPIC_" | ||
|
||
DOWNLOADS_PATH="director/downloads" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import os | ||
import base64 | ||
import json | ||
import time | ||
from typing import Optional | ||
from elevenlabs.client import ElevenLabs | ||
import traceback | ||
from elevenlabs import VoiceSettings | ||
|
||
|
||
class ElevenLabsTool: | ||
def __init__(self, api_key: str): | ||
if api_key: | ||
self.client = ElevenLabs(api_key=api_key) | ||
self.voice_settings = VoiceSettings( | ||
stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True | ||
) | ||
|
||
def create_dub_job( | ||
self, | ||
source_url: str, | ||
target_language: str, | ||
) -> Optional[str]: | ||
""" | ||
Dub an audio or video file from one language to another. | ||
Args: | ||
input_file_path: Path to input file | ||
file_format: Format of input file (e.g. "audio/mpeg") | ||
source_language: Source language code (e.g. "en") | ||
target_language: Target language code (e.g. "es") | ||
Returns: | ||
Path to dubbed file if successful, None if failed | ||
""" | ||
try: | ||
response = self.client.dubbing.dub_a_video_or_an_audio_file( | ||
source_url=source_url, | ||
target_lang=target_language, | ||
) | ||
|
||
dubbing_id = response.dubbing_id | ||
return dubbing_id | ||
|
||
except Exception as e: | ||
return {"error": str(e)} | ||
|
||
def wait_for_dub_job(self, dubbing_id: str) -> bool: | ||
"""Wait for dubbing to complete.""" | ||
MAX_ATTEMPTS = 120 | ||
CHECK_INTERVAL = 30 # In seconds | ||
|
||
for _ in range(MAX_ATTEMPTS): | ||
try: | ||
metadata = self.client.dubbing.get_dubbing_project_metadata(dubbing_id) | ||
print("this is metadata", metadata) | ||
if metadata.status == "dubbed": | ||
return True | ||
elif metadata.status == "dubbing": | ||
time.sleep(CHECK_INTERVAL) | ||
else: | ||
return False | ||
except Exception as e: | ||
print(traceback.format_exc()) | ||
print(f"Error checking dubbing status: {str(e)}") | ||
return False | ||
return False | ||
|
||
def download_dub_file( | ||
self, dubbing_id: str, language_code: str, output_path: str | ||
) -> Optional[str]: | ||
"""Download the dubbed file.""" | ||
try: | ||
with open(output_path, "wb") as file: | ||
for chunk in self.client.dubbing.get_dubbed_file( | ||
dubbing_id, language_code | ||
): | ||
file.write(chunk) | ||
return output_path | ||
except Exception as e: | ||
print(traceback.format_exc()) | ||
print(f"Error downloading dubbed file: {str(e)}") | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
-e . | ||
anthropic==0.37.1 | ||
elevenlabs==1.9.0 | ||
Flask==3.0.3 | ||
Flask-SocketIO==5.3.6 | ||
Flask-Cors==4.0.1 | ||
|