diff --git a/backend/director/agents/composio.py b/backend/director/agents/composio.py index 9c94d6a..5b43097 100644 --- a/backend/director/agents/composio.py +++ b/backend/director/agents/composio.py @@ -12,7 +12,7 @@ ) from director.tools.composio_tool import composio_tool -from director.llm.openai import OpenAI +from director.llm import get_default_llm from director.llm.base import LLMResponseStatus logger = logging.getLogger(__name__) @@ -34,7 +34,7 @@ def __init__(self, session: Session, **kwargs): self.agent_name = "composio" self.description = f'The Composio agent is used to run tasks related to apps like {os.getenv("COMPOSIO_APPS")} ' self.parameters = COMPOSIO_PARAMETERS - self.llm = OpenAI() + self.llm = get_default_llm() super().__init__(session=session, **kwargs) def run(self, task: str, *args, **kwargs) -> AgentResponse: diff --git a/backend/director/agents/meme_maker.py b/backend/director/agents/meme_maker.py index c5493d5..a7b8f3e 100644 --- a/backend/director/agents/meme_maker.py +++ b/backend/director/agents/meme_maker.py @@ -12,7 +12,7 @@ VideoData, ) from director.tools.videodb_tool import VideoDBTool -from director.llm.openai import OpenAI +from director.llm import get_default_llm logger = logging.getLogger(__name__) @@ -41,7 +41,7 @@ def __init__(self, session: Session, **kwargs): self.agent_name = "meme_maker" self.description = "Generates meme clips and images based on user prompts. This agent usages LLM to analyze the transcript and visual content of the video to generate memes." self.parameters = MEMEMAKER_PARAMETERS - self.llm = OpenAI() + self.llm = get_default_llm() super().__init__(session=session, **kwargs) def _chunk_docs(self, docs, chunk_size): diff --git a/backend/director/agents/pricing.py b/backend/director/agents/pricing.py index 0761528..d6e9d8a 100644 --- a/backend/director/agents/pricing.py +++ b/backend/director/agents/pricing.py @@ -8,7 +8,7 @@ RoleTypes, TextContent, ) -from director.llm.openai import OpenAI +from director.llm import get_default_llm logger = logging.getLogger(__name__) @@ -94,7 +94,7 @@ def __init__(self, session: Session, **kwargs): self.agent_name = "pricing" self.description = "Agent to get information about the pricing and usage of VideoDB, it is also helpful for running scenarios to get the estimates." self.parameters = self.get_parameters() - self.llm = OpenAI() + self.llm = get_default_llm() super().__init__(session=session, **kwargs) def run(self, query: str, *args, **kwargs) -> AgentResponse: diff --git a/backend/director/agents/profanity_remover.py b/backend/director/agents/profanity_remover.py index c4f2464..37b0922 100644 --- a/backend/director/agents/profanity_remover.py +++ b/backend/director/agents/profanity_remover.py @@ -13,7 +13,7 @@ ContextMessage, RoleTypes, ) -from director.llm.openai import OpenAI +from director.llm import get_default_llm from director.tools.videodb_tool import VideoDBTool logger = logging.getLogger(__name__) @@ -33,7 +33,7 @@ def __init__(self, session: Session, **kwargs): "if user has not given those optional param of beep_audio_id always try with sending it as None so that defaults are picked from env" ) self.parameters = self.get_parameters() - self.llm = OpenAI() + self.llm = get_default_llm() super().__init__(session=session, **kwargs) def add_beep(self, videodb_tool, video_id, beep_audio_id, timestamps): diff --git a/backend/director/agents/prompt_clip.py b/backend/director/agents/prompt_clip.py index 9b14320..9c9816b 100644 --- a/backend/director/agents/prompt_clip.py +++ b/backend/director/agents/prompt_clip.py @@ -12,7 +12,7 @@ VideoData, ) from director.tools.videodb_tool import VideoDBTool -from director.llm.openai import OpenAI +from director.llm import get_default_llm logger = logging.getLogger(__name__) @@ -46,7 +46,7 @@ def __init__(self, session: Session, **kwargs): self.agent_name = "prompt_clip" self.description = "Generates video clips based on user prompts. This agent uses AI to analyze the text of a video transcript and identify sentences relevant to the user prompt for making clips. It then generates video clips based on the identified sentences. Use this tool to create clips based on specific themes or topics from a video." self.parameters = PROMPTCLIP_AGENT_PARAMETERS - self.llm = OpenAI() + self.llm = get_default_llm() super().__init__(session=session, **kwargs) def _chunk_docs(self, docs, chunk_size): diff --git a/backend/director/agents/search.py b/backend/director/agents/search.py index f4e7c69..312783f 100644 --- a/backend/director/agents/search.py +++ b/backend/director/agents/search.py @@ -1,7 +1,7 @@ import logging from director.agents.base import BaseAgent, AgentResponse, AgentStatus -from director.llm.openai import OpenAI +from director.llm import get_default_llm from director.core.session import ( Session, MsgStatus, @@ -61,7 +61,7 @@ class SearchAgent(BaseAgent): def __init__(self, session: Session, **kwargs): self.agent_name = "search" self.description = "Agent to retreive data from VideoDB collections and videos." - self.llm = OpenAI() + self.llm = get_default_llm() self.parameters = SEARCH_AGENT_PARAMETERS super().__init__(session=session, **kwargs) diff --git a/backend/director/agents/slack_agent.py b/backend/director/agents/slack_agent.py index 706457c..bbd1598 100644 --- a/backend/director/agents/slack_agent.py +++ b/backend/director/agents/slack_agent.py @@ -10,7 +10,7 @@ RoleTypes, ) from director.tools.slack import send_message_to_channel -from director.llm.openai import OpenAI +from director.llm import get_default_llm from director.llm.base import LLMResponseStatus logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ def __init__(self, session: Session, **kwargs): self.agent_name = "slack" self.description = "Messages to a slack channel" self.parameters = self.get_parameters() - self.llm = OpenAI() + self.llm = get_default_llm() super().__init__(session=session, **kwargs) def run(self, message: str, *args, **kwargs) -> AgentResponse: diff --git a/backend/director/agents/subtitle.py b/backend/director/agents/subtitle.py index 6e25651..7025e81 100644 --- a/backend/director/agents/subtitle.py +++ b/backend/director/agents/subtitle.py @@ -12,7 +12,7 @@ MsgStatus, ) from director.tools.videodb_tool import VideoDBTool -from director.llm.openai import OpenAI, OpenaiConfig +from director.llm import get_default_llm from videodb.asset import VideoAsset, TextAsset, TextStyle @@ -109,7 +109,7 @@ class SubtitleAgent(BaseAgent): def __init__(self, session: Session, **kwargs): self.agent_name = "subtitle" self.description = "An agent designed to add different languages subtitles to a specified video within VideoDB." - self.llm = OpenAI(config=OpenaiConfig(timeout=120)) + self.llm = get_default_llm() self.parameters = SUBTITLE_AGENT_PARAMETERS super().__init__(session=session, **kwargs) diff --git a/backend/director/agents/video_summary.py b/backend/director/agents/video_summary.py index 7c67cb9..9c08b6f 100644 --- a/backend/director/agents/video_summary.py +++ b/backend/director/agents/video_summary.py @@ -2,7 +2,7 @@ from director.agents.base import BaseAgent, AgentResponse, AgentStatus from director.core.session import ContextMessage, RoleTypes, TextContent, MsgStatus -from director.llm.openai import OpenAI +from director.llm import get_default_llm from director.tools.videodb_tool import VideoDBTool logger = logging.getLogger(__name__) @@ -12,7 +12,7 @@ class VideoSummaryAgent(BaseAgent): def __init__(self, session=None, **kwargs): self.agent_name = "video_summary" self.description = "This is an agent to summarize the given video of VideoDB, if the user wants a certain kind of summary the prompt is required." - self.llm = OpenAI() + self.llm = get_default_llm() self.parameters = self.get_parameters() super().__init__(session=session, **kwargs) diff --git a/backend/director/constants.py b/backend/director/constants.py index 782e746..f370aac 100644 --- a/backend/director/constants.py +++ b/backend/director/constants.py @@ -19,6 +19,7 @@ class LLMType(str, Enum): OPENAI = "openai" ANTHROPIC = "anthropic" + VIDEODB_PROXY = "videodb_proxy" class EnvPrefix(str, Enum): diff --git a/backend/director/core/reasoning.py b/backend/director/core/reasoning.py index 9d6283c..fa26536 100644 --- a/backend/director/core/reasoning.py +++ b/backend/director/core/reasoning.py @@ -13,7 +13,7 @@ MsgStatus, ) from director.llm.base import LLMResponse -from director.llm.openai import OpenAI +from director.llm import get_default_llm logger = logging.getLogger(__name__) @@ -64,7 +64,7 @@ def __init__( self.session = session self.system_prompt = REASONING_SYSTEM_PROMPT self.max_iterations = 10 - self.llm = OpenAI() + self.llm = get_default_llm() self.agents: List[BaseAgent] = [] self.stop_flag = False self.output_message: OutputMessage = self.session.output_message diff --git a/backend/director/handler.py b/backend/director/handler.py index 91ae8ac..3d9bdbd 100644 --- a/backend/director/handler.py +++ b/backend/director/handler.py @@ -150,16 +150,11 @@ class ConfigHandler: def check(self): """Check the configuration of the server.""" videodb_configured = True if os.getenv("VIDEO_DB_API_KEY") else False - openai_key_configured = True if os.getenv("OPENAI_API_KEY") else False - - llm_configured = False - if openai_key_configured: - llm_configured = True db = load_db(os.getenv("SERVER_DB_TYPE", "sqlite")) db_configured = db.health_check() return { "videodb_configured": videodb_configured, - "llm_configured": llm_configured, + "llm_configured": True, "db_configured": db_configured, } diff --git a/backend/director/llm/__init__.py b/backend/director/llm/__init__.py index e69de29..b909bb3 100644 --- a/backend/director/llm/__init__.py +++ b/backend/director/llm/__init__.py @@ -0,0 +1,23 @@ +import os + +from director.constants import LLMType + +from director.llm.openai import OpenAI +from director.llm.anthropic import AnthropicAI +from director.llm.videodb_proxy import VideoDBProxy + + +def get_default_llm(): + """Get default LLM""" + + openai = True if os.getenv("OPENAI_API_KEY") else False + anthropic = True if os.getenv("ANTHROPIC_API_KEY") else False + + default_llm = os.getenv("DEFAULT_LLM") + + if openai or default_llm == LLMType.OPENAI: + return OpenAI() + elif anthropic or default_llm == LLMType.ANTHROPIC: + return AnthropicAI() + else: + return VideoDBProxy() diff --git a/backend/director/llm/videodb_proxy.py b/backend/director/llm/videodb_proxy.py new file mode 100644 index 0000000..6330711 --- /dev/null +++ b/backend/director/llm/videodb_proxy.py @@ -0,0 +1,170 @@ +import os +import json +from enum import Enum + +from pydantic import Field, field_validator, FieldValidationInfo + + +from director.llm.base import BaseLLM, BaseLLMConfig, LLMResponse, LLMResponseStatus +from director.constants import ( + LLMType, +) + + +class OpenAIChatModel(str, Enum): + """Enum for OpenAI Chat models""" + + GPT4o = "gpt-4o-2024-11-20" + + +class VideoDBProxyConfig(BaseLLMConfig): + """OpenAI Config""" + + llm_type: str = LLMType.VIDEODB_PROXY + api_key: str = os.getenv("VIDEO_DB_API_KEY") + api_base: str = os.getenv("VIDEO_DB_BASE_URL", "https://api.videodb.io") + chat_model: str = Field(default=OpenAIChatModel.GPT4o) + max_tokens: int = 4096 + + @field_validator("api_key") + @classmethod + def validate_non_empty(cls, v, info: FieldValidationInfo): + if not v: + raise ValueError("Please set VIDEO_DB_API_KEY environment variable.") + return v + + +class VideoDBProxy(BaseLLM): + def __init__(self, config: VideoDBProxyConfig = None): + """ + :param config: OpenAI Config + """ + if config is None: + config = VideoDBProxyConfig() + super().__init__(config=config) + try: + import openai + except ImportError: + raise ImportError("Please install OpenAI python library.") + + self.client = openai.OpenAI(api_key=self.api_key, base_url=f"{self.api_base}") + + def _format_messages(self, messages: list): + """Format the messages to the format that OpenAI expects.""" + formatted_messages = [] + for message in messages: + if message["role"] == "assistant" and message.get("tool_calls"): + formatted_messages.append( + { + "role": message["role"], + "content": message["content"], + "tool_calls": [ + { + "id": tool_call["id"], + "function": { + "name": tool_call["tool"]["name"], + "arguments": json.dumps( + tool_call["tool"]["arguments"] + ), + }, + "type": tool_call["type"], + } + for tool_call in message["tool_calls"] + ], + } + ) + else: + formatted_messages.append(message) + return formatted_messages + + def _format_tools(self, tools: list): + """Format the tools to the format that OpenAI expects. + + **Example**:: + + [ + { + "type": "function", + "function": { + "name": "get_delivery_date", + "description": "Get the delivery date for a customer's order.", + "parameters": { + "type": "object", + "properties": { + "order_id": { + "type": "string", + "description": "The customer's order ID." + } + }, + "required": ["order_id"], + "additionalProperties": False + } + } + } + ] + """ + formatted_tools = [] + for tool in tools: + formatted_tools.append( + { + "type": "function", + "function": { + "name": tool["name"], + "description": tool["description"], + "parameters": tool["parameters"], + }, + "strict": True, + } + ) + return formatted_tools + + def chat_completions( + self, messages: list, tools: list = [], stop=None, response_format=None + ): + """Get completions for chat. + + docs: https://platform.openai.com/docs/guides/function-calling + """ + params = { + "model": self.chat_model, + "messages": self._format_messages(messages), + "temperature": self.temperature, + "max_tokens": self.max_tokens, + "top_p": self.top_p, + "stop": stop, + "timeout": self.timeout, + } + if tools: + params["tools"] = self._format_tools(tools) + params["tool_choice"] = "auto" + + if response_format: + params["response_format"] = response_format + + try: + response = self.client.chat.completions.create(**params) + except Exception as e: + print(f"Error: {e}") + return LLMResponse(content=f"Error: {e}") + + return LLMResponse( + content=response.choices[0].message.content or "", + tool_calls=[ + { + "id": tool_call.id, + "tool": { + "name": tool_call.function.name, + "arguments": json.loads(tool_call.function.arguments), + }, + "type": tool_call.type, + } + for tool_call in response.choices[0].message.tool_calls + ] + if response.choices[0].message.tool_calls + else [], + finish_reason=response.choices[0].finish_reason, + send_tokens=response.usage.prompt_tokens, + recv_tokens=response.usage.completion_tokens, + total_tokens=response.usage.total_tokens, + status=LLMResponseStatus.SUCCESS, + )