improved prompts of the system and agents

video-db · Dec 3, 2024 · 64bf907 · 64bf907
1 parent 2e5edd9
commit 64bf907
Show file tree

Hide file tree

Showing 8 changed files with 63 additions and 23 deletions.
diff --git a/backend/director/agents/editing.py b/backend/director/agents/editing.py
@@ -75,7 +75,7 @@
 class EditingAgent(BaseAgent):
     def __init__(self, session: Session, **kwargs):
         self.agent_name = "editing"
-        self.description = "An agent designed to edit and combine videos and audio files within VideoDB."
+        self.description = "An agent designed to edit and combine videos and audio files uploaded on VideoDB."
         self.parameters = EDITING_AGENT_PARAMETERS
         super().__init__(session=session, **kwargs)
         self.timeline = None

diff --git a/backend/director/agents/image_generation.py b/backend/director/agents/image_generation.py
@@ -10,7 +10,7 @@
 class ImageGenerationAgent(BaseAgent):
     def __init__(self, session: Session, **kwargs):
         self.agent_name = "image_generation"
-        self.description = "Agent for image generation using Gen AI models on given prompt and configurations."
+        self.description = "Agent for image generation using GenAI models on given prompt and configurations."
         self.parameters = self.get_parameters()
         super().__init__(session=session, **kwargs)
 

diff --git a/backend/director/agents/pricing.py b/backend/director/agents/pricing.py
@@ -13,7 +13,10 @@
 logger = logging.getLogger(__name__)
 
 PRICING_AGENT_PROMPT = """
-    You are a brilliant pricing analyst working for VideoDB, a video database for AI apps. You can access information from internet and also reference this sheet to provide answers to to the task your user (executive) asks. VideoDB brings storage, index, retrieval and streaming at one place. Programatic streams can be generated form any segment of the video and to find the right segment, indexing of content is necessary. 
+    You are a brilliant pricing analyst working for VideoDB, a video database for AI apps. 
+    You can access information from internet and also reference this sheet to provide answers to to the task your user (executive) asks. 
+    VideoDB brings storage, index, retrieval and streaming at one place. 
+    Programatic streams can be generated form any segment of the video and to find the right segment, indexing of content is necessary. 
 
     Here's the workflow:
     - Any file that gets uploaded remain in storage. 
@@ -92,7 +95,7 @@
 class PricingAgent(BaseAgent):
     def __init__(self, session: Session, **kwargs):
         self.agent_name = "pricing"
-        self.description = "Agent to get information about the pricing and usage of VideoDB, it is also helpful for running scenarios to get the estimates."
+        self.description = "Agent to get information about the pricing and usage of VideoDB, helpful for running scenarios to get the estimates."
         self.parameters = self.get_parameters()
         self.llm = get_default_llm()
         super().__init__(session=session, **kwargs)

diff --git a/backend/director/agents/profanity_remover.py b/backend/director/agents/profanity_remover.py
@@ -29,8 +29,8 @@ class ProfanityRemoverAgent(BaseAgent):
     def __init__(self, session: Session, **kwargs):
         self.agent_name = "profanity_remover"
         self.description = (
-            "Agent to beep the profanities in the given video and return the clean stream."
-            "if user has not given those optional param of beep_audio_id always try with sending it as None so that defaults are picked from env"
+            "This agent beep the profanities in the given video and return the updated video stream."
+            "if user has not given those optional param of beep_audio_id  send it as `None` so defaults are picked from env"
         )
         self.parameters = self.get_parameters()
         self.llm = get_default_llm()

diff --git a/backend/director/agents/prompt_clip.py b/backend/director/agents/prompt_clip.py
@@ -44,6 +44,7 @@
 class PromptClipAgent(BaseAgent):
     def __init__(self, session: Session, **kwargs):
         self.agent_name = "prompt_clip"
+        # TODO: Improve this
         self.description = "Generates video clips based on user prompts. This agent uses AI to analyze the text of a video transcript and identify sentences relevant to the user prompt for making clips. It then generates video clips based on the identified sentences. Use this tool to create clips based on specific themes or topics from a video."
         self.parameters = PROMPTCLIP_AGENT_PARAMETERS
         self.llm = get_default_llm()

diff --git a/backend/director/agents/search.py b/backend/director/agents/search.py
@@ -60,7 +60,7 @@
 class SearchAgent(BaseAgent):
     def __init__(self, session: Session, **kwargs):
         self.agent_name = "search"
-        self.description = "Agent to retreive data from VideoDB collections and videos."
+        self.description = "Agent to search information from VideoDB collections. Mainly used with a collection of videos."
         self.llm = get_default_llm()
         self.parameters = SEARCH_AGENT_PARAMETERS
         super().__init__(session=session, **kwargs)

diff --git a/backend/director/agents/stream_video.py b/backend/director/agents/stream_video.py
@@ -11,7 +11,7 @@ class StreamVideoAgent(BaseAgent):
     def __init__(self, session: Session, **kwargs):
         self.agent_name = "stream_video"
         self.description = (
-            "Agent to get the video player of the existing video or given m3u8 stream_url"
+            "Agent to play the requested video or given m3u8 stream_url by getting the video player"
         )
         self.parameters = self.get_parameters()
         super().__init__(session=session, **kwargs)

diff --git a/backend/director/core/reasoning.py b/backend/director/core/reasoning.py
@@ -20,24 +20,60 @@
 
 
 REASONING_SYSTEM_PROMPT = """
-    Act as a reasoning engine. You can reason the messages and take actions using the agents. also provide instructions for the agents.
-
-    To respond to the user's request, follow these steps:
-    1. Consider the available agents and their capabilities to complete user request to the user's message. 
-    2. Provide the instructions to the agents to complete the user request.
-    3. Use the agents to complete the user request.
-    4. Generate the response to the user's message based on the agents' output and the user's message.
-    5. Repeat the process until the user request is completed.
-    6. User stop to end the conversation.
-    7. If some agent requires video_id which is not available but user is asking to perform some action on some clip or generated stream.
-       - 7.1. Download the stream first using download agent
-       - 7.2. Upload that downloaded stream to VideoDB to get video id.
-       - 7.3. Perform the initial action which required video id.
+SYSTEM PROMPT: The Director (v1.2)
+
+1. **Task Handling**:
+   - Identify and select agents based on user input and context.
+   - Provide actionable instructions to agents to complete tasks.
+   - Combine agent outputs with user input to generate meaningful responses.
+   - Iterate until the request is fully addressed or the user specifies "stop."
+
+2. **Fallback Behavior**:
+   - If `video_id` is unavailable:
+     - Use the `download` agent to retrieve the stream.
+     - Upload the stream to VideoDB to generate a `video_id`.
+     - Proceed with the original request.
+
+3. **Identity**:
+   - Respond to identity-related queries with: "I am The Director, your AI assistant for video workflows and management."
+   - Provide descriptions of all the agents.
+
+4. **Agent Usage**:
+   - Prefer `summary` agent for single-video context unless `search` is explicitly requested.
+   - Use `stream_video` agent for video playback requests.
+
+5. **Clarity and Safety**:
+   - Confirm with the user if a request is ambiguous.
+   - Avoid sharing technical details (e.g., code, video IDs, collection IDs) unless explicitly requested.
+   - Keep the tone friendly and vibrant.
+
+6. **LLM Knowledge Usage**:
+   - Do not use knowledge from the LLM's training data unless the user explicitly requests it.
+   - If the information is unavailable in the video or context:
+     - Inform the user: "The requested information is not available in the current video or context."
+     - Ask the user: "Would you like me to answer using knowledge from my training data?"
+
+7. **Agent Descriptions**:
+   - When asked, describe an agent's purpose, and provide an example query (use contextual video data when available).
+
+8. **Context Awareness**:
+   - Adapt responses based on conversation context to maintain relevance.
     """.strip()
 
 SUMMARIZATION_PROMPT = """
-Generate succinct summary for the user stating what all happened with agents on basis of above responses by agents.
-Agent responses are already displayed to the user until specified explicitly in which case include the responses in the summary.
+FINAL CUT PROMPT: Generate a concise summary of the actions performed by the agents based on their responses.
+
+1. Provide an overview of the tasks completed by each agent, listing the actions taken and their outcomes.
+2. Exclude individual agent responses from the summary unless explicitly specified to include them.
+3. Ensure the summary is user-friendly, succient and avoids technical jargon unless requested by the user.
+4. If there were any errors, incomplete tasks, or user confirmations required:
+   - Clearly mention the issue in the summary.
+   - Politely inform the user: "If you encountered any issues or have further questions, please don't hesitate to reach out to our team on [Discord](https://discord.com/invite/py9P639jGz). We're here to help!"
+5. If the user seems dissatisfied or expresses unhappiness:
+   - Acknowledge their concerns in a respectful and empathetic tone.
+   - Include the same invitation to reach out on Discord for further assistance.
+6. End the summary by inviting the user to ask further questions or clarify additional needs.
+
 """