Skip to content

Commit

Permalink
add dubbing agent with elevenlabs tools (#59)
Browse files Browse the repository at this point in the history
* add dubbing agent & elevenlabs tools
  • Loading branch information
0xrohitgarg authored Nov 13, 2024
1 parent d9e6b1f commit ad39c79
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 2 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ venv
*.egg-info
package-lock.json
*.mjs
site/*
site/*
backend/director/downloads
5 changes: 4 additions & 1 deletion backend/.env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@ BEEP_AUDIO_ID=

# Slack Agent
SLACK_CHANNEL_NAME=
SLACK_BOT_TOKEN=
SLACK_BOT_TOKEN=

# Dubbing AGENT
ELEVENLABS_API_KEY=
172 changes: 172 additions & 0 deletions backend/director/agents/dubbing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import logging
import os

from director.constants import DOWNLOADS_PATH

from director.agents.base import BaseAgent, AgentResponse, AgentStatus
from director.core.session import Session, VideoContent, MsgStatus, VideoData
from director.tools.videodb_tool import VideoDBTool
from director.tools.elevenlabs import ElevenLabsTool

logger = logging.getLogger(__name__)

SUPPORTED_ENGINES = ["elevenlabs"]
DUBBING_AGENT_PARAMETERS = {
"type": "object",
"properties": {
"video_id": {
"type": "string",
"description": "The unique identifier of the video that needs to be dubbed. This ID is used to retrieve the video from the VideoDB collection.",
},
"target_language": {
"type": "string",
"description": "The target language for dubbing (e.g. 'Spanish', 'French', 'German'). The video's audio will be translated and dubbed into this language.",
},
"target_language_code": {
"type": "string",
"description": "The target language code for dubbing (e.g. 'es' for Spanish, 'fr' for French, 'de' for German').",
},
"collection_id": {
"type": "string",
"description": "The unique identifier of the VideoDB collection containing the video. Required to locate and access the correct video library.",
},
"engine": {
"type": "string",
"description": "The dubbing engine to use. Default is 'elevenlabs'. Possible values include 'elevenlabs'.",
"default": "elevenlabs",
},
"engine_params": {
"type": "object",
"description": "Optional parameters for the dubbing engine.",
},
},
"required": [
"video_id",
"target_language",
"target_language_code",
"collection_id",
"engine",
],
}


class DubbingAgent(BaseAgent):
def __init__(self, session: Session, **kwargs):
self.agent_name = "dubbing"
self.description = (
"This is an agent to dub the given video into a target language"
)
self.parameters = DUBBING_AGENT_PARAMETERS
super().__init__(session=session, **kwargs)

def run(
self,
video_id: str,
target_language: str,
target_language_code: str,
collection_id: str,
engine: str,
engine_params: dict = {},
*args,
**kwargs,
) -> AgentResponse:
"""
Process the video dubbing based on the given video ID.
:param str video_id: The ID of the video to process.
:param str target_language: The target language name for dubbing (e.g. Spanish).
:param str target_language_code: The target language code for dubbing (e.g. es).
:param str collection_id: The ID of the collection to process.
:param str engine: The dubbing engine to use. Default is 'elevenlabs'.
:param dict engine_params: Optional parameters for the dubbing engine.
:param args: Additional positional arguments.
:param kwargs: Additional keyword arguments.
:return: The response containing information about the dubbing operation.
:rtype: AgentResponse
"""
try:
self.videodb_tool = VideoDBTool(collection_id=collection_id)

# Get video audio file
video = self.videodb_tool.get_video(video_id)
if not video:
raise Exception(f"Video {video_id} not found")

if engine not in SUPPORTED_ENGINES:
raise Exception(f"{engine} not supported")

video_content = VideoContent(
agent_name=self.agent_name,
status=MsgStatus.progress,
status_message="Processing...",
)
self.output_message.content.append(video_content)
self.output_message.actions.append("Downloading video")
self.output_message.push_update()

download_response = self.videodb_tool.download(video["stream_url"])

os.makedirs(DOWNLOADS_PATH, exist_ok=True)
dubbed_file_path = f"{DOWNLOADS_PATH}/{video_id}_dubbed.mp4"

if engine == "elevenlabs":
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
if not ELEVENLABS_API_KEY:
raise Exception("Elevenlabs API key not present in .env")
elevenlabs_tool = ElevenLabsTool(api_key=ELEVENLABS_API_KEY)
job_id = elevenlabs_tool.create_dub_job(
source_url=download_response["download_url"],
target_language=target_language_code,
)
self.output_message.actions.append(
f"Dubbing job initiated with Job ID: {job_id}"
)
self.output_message.push_update()

self.output_message.actions.append(
"Waiting for dubbing process to complete.."
)
self.output_message.push_update()
elevenlabs_tool.wait_for_dub_job(job_id)

self.output_message.actions.append("Downloading dubbed video")
self.output_message.push_update()
elevenlabs_tool.download_dub_file(
job_id,
target_language_code,
dubbed_file_path,
)

self.output_message.actions.append(
f"Uploading dubbed video to VideoDB as '[Dubbed in {target_language}] {video['name']}'"
)
self.output_message.push_update()

dubbed_video = self.videodb_tool.upload(
dubbed_file_path,
source_type="file_path",
media_type="video",
name=f"[Dubbed in {target_language}] {video['name']}",
)

video_content.video = VideoData(stream_url=dubbed_video["stream_url"])
video_content.status = MsgStatus.success
video_content.status_message = f"Dubbed video in {target_language} has been successfully added to your video. Here is your stream."
self.output_message.publish()

return AgentResponse(
status=AgentStatus.SUCCESS,
message=f"Successfully dubbed video '{video['name']}' to {target_language}",
data={
"stream_url": dubbed_video["stream_url"],
"video_id": dubbed_video["id"],
},
)

except Exception as e:
video_content.status = MsgStatus.error
video_content.status_message = "An error occurred while dubbing the video."
self.output_message.publish()
logger.exception(f"Error in {self.agent_name} agent: {str(e)}")
return AgentResponse(
status=AgentStatus.ERROR, message=f"Failed to dub video: {str(e)}"
)
2 changes: 2 additions & 0 deletions backend/director/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ class EnvPrefix(str, Enum):

OPENAI_ = "OPENAI_"
ANTHROPIC_ = "ANTHROPIC_"

DOWNLOADS_PATH="director/downloads"
2 changes: 2 additions & 0 deletions backend/director/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from director.agents.stream_video import StreamVideoAgent
from director.agents.subtitle import SubtitleAgent
from director.agents.slack_agent import SlackAgent
from director.agents.dubbing import DubbingAgent


from director.core.session import Session, InputMessage, MsgStatus
Expand Down Expand Up @@ -48,6 +49,7 @@ def __init__(self, db, **kwargs):
StreamVideoAgent,
SubtitleAgent,
SlackAgent,
DubbingAgent,
]

def add_videodb_state(self, session):
Expand Down
83 changes: 83 additions & 0 deletions backend/director/tools/elevenlabs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
import base64
import json
import time
from typing import Optional
from elevenlabs.client import ElevenLabs
import traceback
from elevenlabs import VoiceSettings


class ElevenLabsTool:
def __init__(self, api_key: str):
if api_key:
self.client = ElevenLabs(api_key=api_key)
self.voice_settings = VoiceSettings(
stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True
)

def create_dub_job(
self,
source_url: str,
target_language: str,
) -> Optional[str]:
"""
Dub an audio or video file from one language to another.
Args:
input_file_path: Path to input file
file_format: Format of input file (e.g. "audio/mpeg")
source_language: Source language code (e.g. "en")
target_language: Target language code (e.g. "es")
Returns:
Path to dubbed file if successful, None if failed
"""
try:
response = self.client.dubbing.dub_a_video_or_an_audio_file(
source_url=source_url,
target_lang=target_language,
)

dubbing_id = response.dubbing_id
return dubbing_id

except Exception as e:
return {"error": str(e)}

def wait_for_dub_job(self, dubbing_id: str) -> bool:
"""Wait for dubbing to complete."""
MAX_ATTEMPTS = 120
CHECK_INTERVAL = 30 # In seconds

for _ in range(MAX_ATTEMPTS):
try:
metadata = self.client.dubbing.get_dubbing_project_metadata(dubbing_id)
print("this is metadata", metadata)
if metadata.status == "dubbed":
return True
elif metadata.status == "dubbing":
time.sleep(CHECK_INTERVAL)
else:
return False
except Exception as e:
print(traceback.format_exc())
print(f"Error checking dubbing status: {str(e)}")
return False
return False

def download_dub_file(
self, dubbing_id: str, language_code: str, output_path: str
) -> Optional[str]:
"""Download the dubbed file."""
try:
with open(output_path, "wb") as file:
for chunk in self.client.dubbing.get_dubbed_file(
dubbing_id, language_code
):
file.write(chunk)
return output_path
except Exception as e:
print(traceback.format_exc())
print(f"Error downloading dubbed file: {str(e)}")
return None
1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
-e .
anthropic==0.37.1
elevenlabs==1.9.0
Flask==3.0.3
Flask-SocketIO==5.3.6
Flask-Cors==4.0.1
Expand Down

0 comments on commit ad39c79

Please sign in to comment.