diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml index bb63bc6659c..0a678fd9ed1 100644 --- a/.github/workflows/openai.yml +++ b/.github/workflows/openai.yml @@ -53,6 +53,9 @@ jobs: if: matrix.python-version == '3.9' run: | pip install -e .[retrievechat] + - name: Install packages for Teachable when needed + run: | + pip install -e .[teachable] - name: Coverage if: matrix.python-version == '3.9' env: diff --git a/.gitignore b/.gitignore index 98517f9d690..47917823422 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,6 @@ key_openai.txt key_aoai.txt base_aoai.txt wolfram.txt + +# DB on disk for TeachableAgent +tmp/ diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py new file mode 100644 index 00000000000..8db5b699ea2 --- /dev/null +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -0,0 +1,425 @@ +import os +from autogen import oai +from autogen.agentchat.agent import Agent +from autogen.agentchat.assistant_agent import ConversableAgent +from autogen.agentchat.contrib.text_analyzer_agent import TextAnalyzerAgent +from typing import Callable, Dict, Optional, Union, List, Tuple, Any +import chromadb +from chromadb.config import Settings +import pickle + + +try: + from termcolor import colored +except ImportError: + + def colored(x, *args, **kwargs): + return x + + +class TeachableAgent(ConversableAgent): + """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings. + In this class, the term 'user' refers to any caller (human or not) sending messages to this agent. + Not yet tested in the group-chat setting.""" + + def __init__( + self, + name="teachableagent", + system_message: Optional[ + str + ] = "You are a helpful AI assistant that remembers user teachings from prior chats.", + human_input_mode: Optional[str] = "NEVER", + llm_config: Optional[Union[Dict, bool]] = None, + analyzer_llm_config: Optional[Union[Dict, bool]] = None, + teach_config: Optional[Dict] = None, + **kwargs, + ): + """ + Args: + name (str): name of the agent. + system_message (str): system message for the ChatCompletion inference. + human_input_mode (str): This agent should NEVER prompt the human for input. + llm_config (dict or False): llm inference configuration. + Please refer to [Completion.create](/docs/reference/oai/completion#create) + for available options. + To disable llm-based auto reply, set to False. + analyzer_llm_config (dict or False): llm inference configuration passed to TextAnalyzerAgent. + Given the default setting of None, TeachableAgent passes its own llm_config to TextAnalyzerAgent. + teach_config (dict or None): Additional parameters used by TeachableAgent. + To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: + - verbosity (Optional, int): # 0 (default) for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + - reset_db (Optional, bool): True to clear the DB before starting. Default False. + - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" + - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. + - recall_threshold (Optional, float): The maximum distance for retrieved memos, where 0.0 is exact match. Default 1.5. Larger values allow more (but less relevant) memos to be recalled. + - max_num_retrievals (Optional, int): The maximum number of memos to retrieve from the DB. Default 10. + **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). + """ + super().__init__( + name=name, + system_message=system_message, + human_input_mode=human_input_mode, + llm_config=llm_config, + **kwargs, + ) + # Register a custom reply function. + self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply, 1) + + # Assemble the parameter settings. + self._teach_config = {} if teach_config is None else teach_config + self.verbosity = self._teach_config.get("verbosity", 0) + self.reset_db = self._teach_config.get("reset_db", False) + self.path_to_db_dir = self._teach_config.get("path_to_db_dir", "./tmp/teachable_agent_db") + self.prepopulate = self._teach_config.get("prepopulate", True) + self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) + self.max_num_retrievals = self._teach_config.get("max_num_retrievals", 10) + + # Create the analyzer. + if analyzer_llm_config is None: + analyzer_llm_config = llm_config + self.analyzer = TextAnalyzerAgent(llm_config=analyzer_llm_config) + + # Create the memo store. + self.memo_store = MemoStore(self.verbosity, self.reset_db, self.path_to_db_dir) + self.user_comments = [] # Stores user comments until the end of each chat. + + def close_db(self): + """Cleanly closes the memo store.""" + self.memo_store.close() + + def prepopulate_db(self): + """Adds a few arbitrary memos to the DB.""" + self.memo_store.prepopulate() + + def _generate_teachable_assistant_reply( + self, + messages: Optional[List[Dict]] = None, + sender: Optional[Agent] = None, + config: Optional[Any] = None, # Persistent state. + ) -> Tuple[bool, Union[str, Dict, None]]: + """ + Generates a reply to the last user message, after querying the memo store for relevant information. + Uses TextAnalyzerAgent to make decisions about memo storage and retrieval. + """ + if self.llm_config is False: + raise ValueError("TeachableAgent requires self.llm_config to be set in its base class.") + if messages is None: + messages = self._oai_messages[sender] # In case of a direct call. + + # Get the last user turn. + last_message = messages[-1] + user_text = last_message["content"] + if (not isinstance(user_text, str)) or ("context" in last_message): + raise ValueError( + "TeachableAgent currently assumes that the message content is a simple string. This error serves to flag a test case for relaxing this assumption." + ) + + # Keep track of this user turn as a potential source of memos later. + self.user_comments.append(user_text) + + # Consider whether to retrieve something from the DB. + if self.memo_store.last_memo_id > 0: + new_user_text = self.consider_memo_retrieval(user_text) + if new_user_text != user_text: + # Make a copy of the message list, and replace the last user message with the new one. + messages = messages.copy() + messages[-1]["content"] = new_user_text + + # Generate a response. + msgs = self._oai_system_message + messages + response = oai.ChatCompletion.create(messages=msgs, **self.llm_config) + response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] + return True, response_text + + def learn_from_user_feedback(self): + """Reviews the user comments from the last chat, and decides what teachings to store as memos.""" + print(colored("\nREVIEWING CHAT FOR USER TEACHINGS TO REMEMBER", "light_yellow")) + # Look at each user turn. + if len(self.user_comments) > 0: + for comment in self.user_comments: + # Consider whether to store something from this user turn in the DB. + self.consider_memo_storage(comment) + self.user_comments = [] + + def consider_memo_storage(self, comment): + """Decides whether to store something from one user comment in the DB.""" + # Check for a problem-solution pair. + response = self.analyze( + comment, + "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.", + ) + if "yes" in response.lower(): + # Can we extract advice? + advice = self.analyze( + comment, + "Briefly copy any advice from the TEXT that may be useful for a similar but different task in the future. But if no advice is present, just respond with 'none'.", + ) + if "none" not in advice.lower(): + # Yes. Extract the task. + task = self.analyze( + comment, + "Briefly copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.", + ) + # Generalize the task. + general_task = self.analyze( + task, + "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.", + ) + # Add the task-advice (problem-solution) pair to the vector DB. + if self.verbosity >= 1: + print(colored("\nREMEMBER THIS TASK-ADVICE PAIR", "light_yellow")) + self.memo_store.add_input_output_pair(general_task, advice) + + # Check for information to be learned. + response = self.analyze( + comment, + "Does the TEXT contain information that could be committed to memory? Answer with just one word, yes or no.", + ) + if "yes" in response.lower(): + # Yes. What question would this information answer? + question = self.analyze( + comment, + "Imagine that the user forgot this information in the TEXT. How would they ask you for this information? Include no other text in your response.", + ) + # Extract the information. + answer = self.analyze( + comment, "Copy the information from the TEXT that should be committed to memory. Add no explanation." + ) + # Add the question-answer pair to the vector DB. + if self.verbosity >= 1: + print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", "light_yellow")) + self.memo_store.add_input_output_pair(question, answer) + + def consider_memo_retrieval(self, comment): + """Decides whether to retrieve memos from the DB, and add them to the chat context.""" + + # First, use the user comment directly as the lookup key. + if self.verbosity >= 1: + print(colored("\nLOOK FOR RELEVANT MEMOS, AS QUESTION-ANSWER PAIRS", "light_yellow")) + memo_list = self.retrieve_relevant_memos(comment) + + # Next, if the comment involves a task, then extract and generalize the task before using it as the lookup key. + response = self.analyze( + comment, + "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.", + ) + if "yes" in response.lower(): + if self.verbosity >= 1: + print(colored("\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS", "light_yellow")) + # Extract the task. + task = self.analyze( + comment, "Copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice." + ) + # Generalize the task. + general_task = self.analyze( + task, + "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.", + ) + # Append any relevant memos. + memo_list.extend(self.retrieve_relevant_memos(general_task)) + + # De-duplicate the memo list. + memo_list = list(set(memo_list)) + + # Append the memos to the last user message. + return comment + self.concatenate_memo_texts(memo_list) + + def retrieve_relevant_memos(self, input_text): + """Returns semantically related memos from the DB.""" + memo_list = self.memo_store.get_related_memos( + input_text, n_results=self.max_num_retrievals, threshold=self.recall_threshold + ) + + if self.verbosity >= 1: + # Was anything retrieved? + if len(memo_list) == 0: + # No. Look at the closest memo. + print(colored("\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD:", "light_yellow")) + self.memo_store.get_nearest_memo(input_text) + print() # Print a blank line. The memo details were printed by get_nearest_memo(). + + # Create a list of just the memo output_text strings. + memo_list = [memo[1] for memo in memo_list] + return memo_list + + def concatenate_memo_texts(self, memo_list): + """Concatenates the memo texts into a single string for inclusion in the chat context.""" + memo_texts = "" + if len(memo_list) > 0: + info = "\n# Memories that might help\n" + for memo in memo_list: + info = info + "- " + memo + "\n" + if self.verbosity >= 1: + print(colored("\nMEMOS APPENDED TO LAST USER MESSAGE...\n" + info + "\n", "light_yellow")) + memo_texts = memo_texts + "\n" + info + return memo_texts + + def analyze(self, text_to_analyze, analysis_instructions): + """Asks TextAnalyzerAgent to analyze the given text according to specific instructions.""" + if self.verbosity >= 2: + # Use the messaging mechanism so that the analyzer's messages are included in the printed chat. + self.analyzer.reset() # Clear the analyzer's list of messages. + self.send( + recipient=self.analyzer, message=text_to_analyze, request_reply=False + ) # Put the message in the analyzer's list. + self.send(recipient=self.analyzer, message=analysis_instructions, request_reply=True) # Request the reply. + return self.last_message(self.analyzer)["content"] + else: + # Use the analyzer's method directly, to leave analyzer message out of the printed chat. + return self.analyzer.analyze_text(text_to_analyze, analysis_instructions) + + +class MemoStore: + """ + Provides memory storage and retrieval for a TeachableAgent, using a vector database. + Each DB entry (called a memo) is a pair of strings: an input text and an output text. + The input text might be a question, or a task to perform. + The output text might be an answer to the question, or advice on how to perform the task. + Vector embeddings are currently supplied by Chroma's default Sentence Transformers. + """ + + def __init__(self, verbosity, reset, path_to_db_dir): + """ + Args: + - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. + - path_to_db_dir (Optional, str): path to the directory where the DB is stored. + """ + self.verbosity = verbosity + self.reset = reset + self.path_to_db_dir = path_to_db_dir + + # Load or create the vector DB on disk. + settings = Settings( + anonymized_telemetry=False, allow_reset=True, is_persistent=True, persist_directory=path_to_db_dir + ) + self.db_client = chromadb.Client(settings) + self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. + if reset: + self.reset_db() + + # Load or create the associated memo dict on disk. + self.path_to_dict = os.path.join(path_to_db_dir, "uid_text_dict.pkl") + self.uid_text_dict = {} + self.last_memo_id = 0 + if (not reset) and os.path.exists(self.path_to_dict): + print(colored("\nLOADING MEMORY FROM DISK", "light_green")) + print(colored(" Location = {}".format(self.path_to_dict), "light_green")) + with open(self.path_to_dict, "rb") as f: + self.uid_text_dict = pickle.load(f) + self.last_memo_id = len(self.uid_text_dict) + if self.verbosity >= 3: + self.list_memos() + + def list_memos(self): + """Prints the contents of MemoStore.""" + print(colored("LIST OF MEMOS", "light_green")) + for uid, text in self.uid_text_dict.items(): + input_text, output_text = text + print( + colored( + " ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text), + "light_green", + ) + ) + + def close(self): + """Saves self.uid_text_dict to disk.""" + print(colored("\nSAVING MEMORY TO DISK", "light_green")) + print(colored(" Location = {}".format(self.path_to_dict), "light_green")) + with open(self.path_to_dict, "wb") as file: + pickle.dump(self.uid_text_dict, file) + + def reset_db(self): + """Forces immediate deletion of the DB's contents, in memory and on disk.""" + print(colored("\nCLEARING MEMORY", "light_green")) + self.db_client.delete_collection("memos") + self.vec_db = self.db_client.create_collection("memos") + self.uid_text_dict = {} + + def add_input_output_pair(self, input_text, output_text): + """Adds an input-output pair to the vector DB.""" + self.last_memo_id += 1 + self.vec_db.add(documents=[input_text], ids=[str(self.last_memo_id)]) + self.uid_text_dict[str(self.last_memo_id)] = input_text, output_text + if self.verbosity >= 1: + print( + colored( + "\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}".format( + self.last_memo_id, input_text, output_text + ), + "light_green", + ) + ) + if self.verbosity >= 3: + self.list_memos() + + def get_nearest_memo(self, query_text): + """Retrieves the nearest memo to the given query text.""" + results = self.vec_db.query(query_texts=[query_text], n_results=1) + uid, input_text, distance = results["ids"][0][0], results["documents"][0][0], results["distances"][0][0] + input_text_2, output_text = self.uid_text_dict[uid] + assert input_text == input_text_2 + if self.verbosity >= 1: + print( + colored( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance + ), + "light_green", + ) + ) + return input_text, output_text, distance + + def get_related_memos(self, query_text, n_results, threshold): + """Retrieves memos that are related to the given query text within the specified distance threshold.""" + if n_results > len(self.uid_text_dict): + n_results = len(self.uid_text_dict) + results = self.vec_db.query(query_texts=[query_text], n_results=n_results) + memos = [] + num_results = len(results["ids"][0]) + for i in range(num_results): + uid, input_text, distance = results["ids"][0][i], results["documents"][0][i], results["distances"][0][i] + if distance < threshold: + input_text_2, output_text = self.uid_text_dict[uid] + assert input_text == input_text_2 + if self.verbosity >= 1: + print( + colored( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance + ), + "light_green", + ) + ) + memos.append((input_text, output_text, distance)) + return memos + + def prepopulate(self): + """Adds a few arbitrary examples to the vector DB, just to make retrieval less trivial.""" + if self.verbosity >= 1: + print(colored("\nPREPOPULATING MEMORY", "light_green")) + examples = [] + examples.append({"text": "When I say papers I mean research papers, which are typically pdfs.", "label": "yes"}) + examples.append({"text": "Please verify that each paper you listed actually uses langchain.", "label": "no"}) + examples.append({"text": "Tell gpt the output should still be latex code.", "label": "no"}) + examples.append({"text": "Hint: convert pdfs to text and then answer questions based on them.", "label": "yes"}) + examples.append( + {"text": "To create a good PPT, include enough content to make it interesting.", "label": "yes"} + ) + examples.append( + { + "text": "No, for this case the columns should be aspects and the rows should be frameworks.", + "label": "no", + } + ) + examples.append({"text": "When writing code, remember to include any libraries that are used.", "label": "yes"}) + examples.append({"text": "Please summarize the papers by Eric Horvitz on bounded rationality.", "label": "no"}) + examples.append({"text": "Compare the h-index of Daniel Weld and Oren Etzioni.", "label": "no"}) + examples.append( + { + "text": "Double check to be sure that the columns in a table correspond to what was asked for.", + "label": "yes", + } + ) + for example in examples: + self.add_input_output_pair(example["text"], example["label"]) diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py new file mode 100644 index 00000000000..8cf88eba6ae --- /dev/null +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -0,0 +1,82 @@ +from autogen import oai +from autogen.agentchat.agent import Agent +from autogen.agentchat.assistant_agent import ConversableAgent +from typing import Callable, Dict, Optional, Union, List, Tuple, Any + +system_message = """You are an expert in text analysis. +The user will give you TEXT to analyze. +The user will give you analysis INSTRUCTIONS copied twice, at both the beginning and the end. +You will follow these INSTRUCTIONS in analyzing the TEXT, then give the results of your expert analysis in the format requested.""" + + +class TextAnalyzerAgent(ConversableAgent): + """Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed.""" + + def __init__( + self, + name="analyzer", + system_message: Optional[str] = system_message, + human_input_mode: Optional[str] = "NEVER", + llm_config: Optional[Union[Dict, bool]] = None, + **kwargs, + ): + """ + Args: + name (str): name of the agent. + system_message (str): system message for the ChatCompletion inference. + human_input_mode (str): This agent should NEVER prompt the human for input. + llm_config (dict or False): llm inference configuration. + Please refer to [Completion.create](/docs/reference/oai/completion#create) + for available options. + To disable llm-based auto reply, set to False. + teach_config (dict or None): Additional parameters used by TeachableAgent. + To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: + - verbosity (Optional, int): # 0 (default) for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + - reset_db (Optional, bool): True to clear the DB before starting. Default False. + - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" + - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. + - recall_threshold (Optional, float): The maximum distance for retrieved memos, where 0.0 is exact match. Default 1.5. Larger values allow more (but less relevant) memos to be recalled. + - max_num_retrievals (Optional, int): The maximum number of memos to retrieve from the DB. Default 10. + **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). + """ + super().__init__( + name=name, + system_message=system_message, + human_input_mode=human_input_mode, + llm_config=llm_config, + **kwargs, + ) + self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply, 1) + + def _analyze_in_reply( + self, + messages: Optional[List[Dict]] = None, + sender: Optional[Agent] = None, + config: Optional[Any] = None, + ) -> Tuple[bool, Union[str, Dict, None]]: + """Analyzes the given text as instructed, and returns the analysis as a message. + Assumes exactly two messages containing the text to analyze and the analysis instructions. + See TeachableAgent.analyze for an example of how to use this method.""" + if self.llm_config is False: + raise ValueError("TextAnalyzerAgent requires self.llm_config to be set in its base class.") + if messages is None: + messages = self._oai_messages[sender] # In case of a direct call. + assert len(messages) == 2 + + # Delegate to the analysis method. + return True, self.analyze_text(messages[0]["content"], messages[1]["content"]) + + def analyze_text(self, text_to_analyze, analysis_instructions): + """Analyzes the given text as instructed, and returns the analysis.""" + # Assemble the message. + text_to_analyze = "# TEXT\n" + text_to_analyze + "\n" + analysis_instructions = "# INSTRUCTIONS\n" + analysis_instructions + "\n" + msg_text = "\n".join( + [analysis_instructions, text_to_analyze, analysis_instructions] + ) # Repeat the instructions. + messages = self._oai_system_message + [{"role": "user", "content": msg_text}] + + # Generate and return the analysis string. + response = oai.ChatCompletion.create(context=None, messages=messages, **self.llm_config) + output_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] + return output_text diff --git a/notebook/agentchat_teachability.ipynb b/notebook/agentchat_teachability.ipynb new file mode 100644 index 00000000000..54f73fbcb83 --- /dev/null +++ b/notebook/agentchat_teachability.ipynb @@ -0,0 +1,791 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Chatting with TeachableAgent\n", + "\n", + "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long for the LLM to handle effectively. In subsequent chats, the user is forced to repeat any necessary instructions over and over.\n", + "\n", + "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is saved to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the teachable agent just once, and have it remember them in later chats.\n", + "\n", + "In making decisions about memo storage and retrieval, `TeachableAgent` calls an instance of `TextAnalyzerAgent` to analyze pieces of text in several different ways. This adds extra LLM calls involving a relatively small number of tokens. These calls can add a few seconds to the time a user waits for a response.\n", + "\n", + "This notebook demonstrates how `TeachableAgent` can learn facts, preferences, and skills from users. To chat with `TeachableAgent` yourself, run [chat_with_teachable_agent.py](../test/agentchat/chat_with_teachable_agent.py).\n", + "\n", + "## Requirements\n", + "\n", + "AutoGen requires `Python>=3.8`. To run this notebook example, please install the [teachable] option.\n", + "```bash\n", + "pip install \"pyautogen[teachable]\"\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture --no-stderr\n", + "# %pip install \"pyautogen[teachable]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set your API Endpoint\n", + "\n", + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gpt-4\n" + ] + } + ], + "source": [ + "import autogen\n", + "\n", + "config_list = autogen.config_list_from_json(\n", + " env_or_file=\"OAI_CONFIG_LIST\",\n", + " file_location=\".\",\n", + " filter_dict={\n", + " \"model\": [\"gpt-4\", \"gpt4\", \"gpt-4-32k\"],\n", + " },\n", + ")\n", + "\n", + "print(config_list[0][\"model\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It first looks for environment variable \"OAI_CONFIG_LIST\" which needs to be a valid json string. If that variable is not found, it then looks for a json file named \"OAI_CONFIG_LIST\". It filters the configs by models (you can filter by other keys as well). After application of this particular filter, only the gpt-4 models are kept.\n", + "\n", + "The config list looks like the following:\n", + "```python\n", + "config_list = [\n", + " {\n", + " 'model': 'gpt-4',\n", + " 'api_key': '',\n", + " },\n", + " {\n", + " 'model': 'gpt-4',\n", + " 'api_key': '',\n", + " 'api_base': '',\n", + " 'api_type': 'azure',\n", + " 'api_version': '2023-06-01-preview',\n", + " },\n", + " {\n", + " 'model': 'gpt-4-32k',\n", + " 'api_key': '',\n", + " 'api_base': '',\n", + " 'api_type': 'azure',\n", + " 'api_version': '2023-06-01-preview',\n", + " },\n", + "]\n", + "```\n", + "\n", + "If you open this notebook in colab, you can upload your files by clicking the file icon on the left panel and then choose \"upload file\" icon.\n", + "\n", + "You can set the value of config_list in other ways if you prefer, e.g., loading from a YAML file." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Construct Agents\n", + "For this walkthrough, we start by resetting the teachable agent's memory store. This deletes any memories from prior conversations that may be stored on disk." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[92m\n", + "CLEARING MEMORY\u001b[0m\n" + ] + } + ], + "source": [ + "from autogen.agentchat.contrib.teachable_agent import TeachableAgent\n", + "from autogen import UserProxyAgent\n", + "\n", + "llm_config = {\n", + " \"request_timeout\": 60,\n", + " \"config_list\": config_list,\n", + " \"use_cache\": True, # Use False to explore LLM non-determinism.\n", + "}\n", + "\n", + "teach_config={\n", + " \"verbosity\": 0, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.\n", + " \"reset_db\": True, # Set to True to start over with an empty database.\n", + " \"path_to_db_dir\": \"./tmp/notebook/teachable_agent_db\", # Path to the directory where the database will be stored.\n", + " \"recall_threshold\": 1.5, # Higher numbers allow more (but less relevant) memos to be recalled.\n", + "}\n", + "\n", + "try:\n", + " from termcolor import colored\n", + "except ImportError:\n", + " def colored(x, *args, **kwargs):\n", + " return x\n", + " \n", + "autogen.ChatCompletion.start_logging()\n", + "\n", + "teachable_agent = TeachableAgent(\n", + " name=\"teachableagent\",\n", + " llm_config=llm_config,\n", + " teach_config=teach_config)\n", + "\n", + "user = UserProxyAgent(\n", + " name=\"user\",\n", + " human_input_mode=\"NEVER\",\n", + " is_termination_msg=lambda x: True if \"TERMINATE\" in x.get(\"content\") else False,\n", + " max_consecutive_auto_reply=0,\n", + ")\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning new facts\n", + "Let's teach the agent some facts it doesn't already know, since they are more recent than GPT-4's training data." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "What is the Vicuna model?\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "I'm sorry, but I don't have information about the \"Vicuna model\" in my current database. Could you please provide some context, description, or details about this model so I can better assist you?\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"What is the Vicuna model?\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Vicuna is a 13B-parameter language model released by Meta.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "Thank you for providing the information. The Vicuna model is a large-scale language model developed by Meta. It consists of 13 billion parameters, which is what makes it able to understand and generate human-like text across various topics. Parameters in a neural network are the weights and biases assigned to each connection between the neurons, and having more parameters enables the model to learn and capture more complex patterns and relationships in data. As an AI language model, Vicuna can be utilized in various language-related tasks, such as text completion, translation, summarization, and more.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"Vicuna is a 13B-parameter language model released by Meta.\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "What is the Orca model?\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "The Orca model is a yet-to-be-released language model by Meta, referring to the Research Preview API release of Meta's advanced language model. While I don't have specific details about the Orca model at this time, it is expected that the model would have an even larger number of parameters to further enhance its capabilities compared to its predecessor, Vicuna.\n", + "\n", + "As AI language models evolve, they generally become more efficient at understanding and generating human-like text. This makes them more effective in completing a wide range of tasks, including text completion, translation, summarization, and Q&A, as well as providing more accurate and coherent results in various applications.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"What is the Orca model?\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "Thank you for the clarification. Orca is a 13-billion-parameter language model developed by Microsoft. It is designed to outperform Vicuna, the language model released by Meta. With a large number of parameters, Orca is capable of understanding and generating human-like text across diverse subjects, making it highly useful in various applications.\n", + "\n", + "Orca's advanced capabilities allow it to excel in tasks such as text completion, translation, summarization, Q&A, and more. Its performance improvement over Vicuna highlights the rapid advancements in natural language processing (NLP) research and the development of increasingly capable AI language models.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's end our first chat here. The following function needs to be called at the end of each chat, so that `TeachableAgent` can store what the user has taught it." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n" + ] + } + ], + "source": [ + "teachable_agent.learn_from_user_feedback()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's start a new chat by clearing the previous chat's history. At this point, common LLM-based assistants would forget everything from the last chat. But `TeachableAgent` can retrieve memories from its vector DB as needed, allowing it to recall and reason over facts that the user taught it in earlier conversations." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "How does the Vicuna model compare to the Orca model?\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "The Vicuna and Orca models are both 13B-parameter language models released by Meta and Microsoft, respectively. In terms of performance, Orca has been reported to outperform Vicuna on most tasks. However, without detailed information about specific tasks and benchmarks, it is difficult to provide a more comprehensive comparison. Generally speaking, both models are advanced language models that aim to provide high-quality natural language understanding and generation, but Orca appears to have an edge in terms of overall performance.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"How does the Vicuna model compare to the Orca model?\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning user preferences\n", + "Now let's teach the agent some of our preferences. Suppose that we frequently post short summaries of new papers for our team to read, and we want the teachable agent to help us do this faster." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Please summarize this abstract.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "AutoGen is an open-source framework designed to enable developers to create LLM applications with multiple customizable agents that can converse with each other to complete tasks. These agents can operate using different combinations of LLMs, human inputs, and tools, allowing developers to define agent interaction behaviors flexibly. AutoGen supports programming flexible conversation patterns using both natural language and code, making it suitable for building diverse applications with varying complexities and LLM capacities. Its effectiveness has been demonstrated through empirical studies across various domains including mathematics, coding, operations research, decision-making, and entertainment.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Please summarize this abstract.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\"\"\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But that's unstructured. So let's teach the agent our preference." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Please summarize this abstract. \n", + "When I'm summarizing an abstract, I try to make the summary contain just three short bullet points: the title, the innovation, and the key empirical results.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "- Title: AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "- Innovation: Open-source framework for creating customizable LLM applications through agent conversations, supporting various modes and interaction behaviors.\n", + "- Key Empirical Results: Demonstrated effectiveness across diverse application domains, including mathematics, coding, question answering, and more.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Please summarize this abstract. \n", + "When I'm summarizing an abstract, I try to make the summary contain just three short bullet points: the title, the innovation, and the key empirical results.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\"\"\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's much better, but will the teachable agent remember these preferences in the future, for a different paper? Let's start a new chat to find out!" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Please summarize this abstract.\n", + "\n", + "Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "Sébastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, Yi Zhang\n", + "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "- Title: Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "- Innovation: GPT-4, an LLM with remarkable capabilities, demonstrates human-level performance across various domains, like math, coding, vision, medicine, law, and psychology.\n", + "- Key results: GPT-4 significantly surpasses prior models, suggesting it may be an early version of AGI; limitations and challenges toward deeper AGI are also discussed.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "teachable_agent.learn_from_user_feedback()\n", + "\n", + "text = \"\"\"Please summarize this abstract.\n", + "\n", + "Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "Sébastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, Yi Zhang\n", + "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\"\"\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning new skills\n", + "Finally, let's extend the teachable agent's capabilities by teaching it a new skill for accomplishing a challenging type of task. \n", + "\n", + "The [Sparks of AGI](https://arxiv.org/abs/2303.12712) paper evaluated GPT-4 on math problems like the following, which it could only solve 32% of the time." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "Step 1: Identify the current equation and the target value on the right-hand side.\n", + "Current equation: 9 * 4 + 6 * 6 = 72\n", + "Target value: 99\n", + "\n", + "Step 2: Determine what numbers can be changed on the left-hand side.\n", + "Possible changes: 9, 4, 6 (first occurrence), and 6 (second occurrence)\n", + "\n", + "Step 3: Evaluate whether each change in the numbers on the left-hand side can result in the target value.\n", + "9 -> If changed, it would create a larger product.\n", + "4 -> If changed, the product would increase or decrease by more than the amount needed to reach 99.\n", + "6 (first occurrence) -> If increased to 7, then the equation would be 9 * 4 + 7 * 6 = 94, which is still not 99.\n", + "6 (second occurrence) -> If increased to 7, then the equation would be 9 * 4 + 6 * 7 = 99.\n", + "\n", + "Step 4: Write down the solution.\n", + "The solution is: 9 * 4 + 6 * 7\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This time, lets teach the agent a reliable strategy for solving such tasks." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "Here's some advice that may help:\n", + "1. Let E denote the original number on the right.\n", + "2. Let F denote the final number on the right.\n", + "3. Calculate the difference between the two, G = F - E.\n", + "4. Examine the numbers on the left one by one until finding one that divides evenly into G, where negative integers are allowed.\n", + "5. Calculate J = G / H. This is the number of times that H divides into G.\n", + "6. Verify that J is an integer, and that H * J = G.\n", + "7. Find the number on the left which is multiplied by H, and call it K.\n", + "8. Change K to K + J.\n", + "9. Recompute the value on the left, and verify that it equals F.\n", + "Finally, write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "Step 1: Identify the original number (E) and the final number (F) on the right-hand side.\n", + "E = 72\n", + "F = 99\n", + "\n", + "Step 2: Calculate the difference between the two, G = F - E.\n", + "G = 99 - 72 = 27\n", + "\n", + "Step 3: Examine the numbers on the left one by one until finding one that divides evenly into G, where negative integers are allowed.\n", + "9 (divides evenly into 27)\n", + "4 (does not)\n", + "6 (first occurrence, does not)\n", + "6 (second occurrence, does not)\n", + "\n", + "Step 4: Calculate J = G / H. This is the number of times that H divides into G.\n", + "H = 9 (since 9 divides evenly into 27)\n", + "J = 27 / 9 = 3\n", + "\n", + "Step 5: Verify that J is an integer, and that H * J = G.\n", + "H * J = 9 * 3 = 27\n", + "J = 3 (which is an integer)\n", + "\n", + "Step 6: Find the number on the left which is multiplied by H, and call it K.\n", + "Number on the leftside of the equation that is multiplied by 9: 4\n", + "Therefore, K = 4\n", + "\n", + "Step 7: Change K to K + J.\n", + "K + J = 4 + 3 = 7\n", + "\n", + "Step 8: Recompute the value on the left, and verify that it equals F.\n", + "9 * 7 + 6 * 6 = 63 + 36 = 99 (which is equal to F)\n", + "\n", + "Step 9: Write down the solution.\n", + "The solution is: 9 * 7 + 6 * 6\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "Here's some advice that may help:\n", + "1. Let E denote the original number on the right.\n", + "2. Let F denote the final number on the right.\n", + "3. Calculate the difference between the two, G = F - E.\n", + "4. Examine the numbers on the left one by one until finding one that divides evenly into G, where negative integers are allowed.\n", + "5. Calculate J = G / H. This is the number of times that H divides into G.\n", + "6. Verify that J is an integer, and that H * J = G.\n", + "7. Find the number on the left which is multiplied by H, and call it K.\n", + "8. Change K to K + J.\n", + "9. Recompute the value on the left, and verify that it equals F.\n", + "Finally, write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When given this advice, GPT-4 can solve such problems over 95% of the time. But can the teachable agent remember the strategy so the user doesn't have to explain it over and over? As before, let's start a new chat to find out." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "Let's follow the advice to modify one integer on the left hand side of the equation to make the right hand side become 99:\n", + "\n", + "1. Let E denote the original number on the right: E = 72.\n", + "2. Let F denote the final number on the right: F = 99.\n", + "3. Calculate the difference between the two, G = F - E: G = 99 - 72 = 27.\n", + "4. Examine the numbers on the left one by one to find one that divides evenly into G: 9, 4, 6. Since 9 divides 27 evenly, we will work with 9 (H = 9).\n", + "5. Calculate J = G / H: J = 27 / 9 = 3.\n", + "6. Verify that J is an integer, and that H * J = G: J is an integer, and 9 * 3 = 27.\n", + "7. Find the number on the left which is multiplied by H (9), and call it K: K = 4.\n", + "8. Change K to K + J: K_new = 4 + 3 = 7.\n", + "9. Recompute the value on the left, and verify that it equals F: (9 * 7) + (6 * 6) = 63 + 36 = 99.\n", + "\n", + "The solution is: 9 * 7 + 6 * 6\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "teachable_agent.learn_from_user_feedback()\n", + "\n", + "text = \"\"\"Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As a final check, let's test the teachable agent's newly learned skill on a separate instance of the task." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to teachableagent):\n", + "\n", + "Consider the identity: \n", + "8 * 3 + 7 * 9 = 87\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 59?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", + "\n", + "Let's follow the advice to modify one integer on the left hand side of the equation to make the right hand side become 59:\n", + "\n", + "1. Let E denote the original number on the right: E = 87.\n", + "2. Let F denote the final number on the right: F = 59.\n", + "3. Calculate the difference between the two, G = F - E: G = 59 - 87 = -28.\n", + "4. Examine the numbers on the left one by one to find one that divides evenly into G: 8, 3, 7, 9. Since 7 divides -28 evenly, we will work with 7 (H = 7).\n", + "5. Calculate J = G / H: J = -28 / 7 = -4.\n", + "6. Verify that J is an integer, and that H * J = G: J is an integer, and 7 * (-4) = -28.\n", + "7. Find the number on the left which is multiplied by H (7), and call it K: K = 9.\n", + "8. Change K to K + J: K_new = 9 + (-4) = 5.\n", + "9. Recompute the value on the left, and verify that it equals F: (8 * 3) + (7 * 5) = 24 + 35 = 59.\n", + "\n", + "The solution is: 8 * 3 + 7 * 5\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Consider the identity: \n", + "8 * 3 + 7 * 9 = 87\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 59?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "flaml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py index a42432eb033..05cabc3ef67 100644 --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ "blendsearch": ["flaml[blendsearch]"], "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"], "retrievechat": ["chromadb", "tiktoken", "sentence_transformers", "pypdf"], + "teachable": ["chromadb"], }, classifiers=[ "Programming Language :: Python :: 3", diff --git a/test/agentchat/chat_with_teachable_agent.py b/test/agentchat/chat_with_teachable_agent.py new file mode 100644 index 00000000000..211ebe59097 --- /dev/null +++ b/test/agentchat/chat_with_teachable_agent.py @@ -0,0 +1,60 @@ +from autogen import UserProxyAgent, config_list_from_json +from autogen.agentchat.contrib.teachable_agent import TeachableAgent + + +try: + from termcolor import colored +except ImportError: + + def colored(x, *args, **kwargs): + return x + + +verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. +recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. +use_cache = False # If True, cached LLM calls will be skipped and responses pulled from cache. False exposes LLM non-determinism. + +# Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input. +filter_dict = {"model": ["gpt-4"]} + + +def create_teachable_agent(reset_db=False): + """Instantiates a TeachableAgent using the settings from the top of this file.""" + # Load LLM inference endpoints from an env variable or a file + # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints + # and OAI_CONFIG_LIST_sample + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) + teachable_agent = TeachableAgent( + name="teachableagent", + llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, + teach_config={ + "verbosity": verbosity, + "reset_db": reset_db, + "path_to_db_dir": "./tmp/interactive/teachable_agent_db", + "recall_threshold": recall_threshold, + }, + ) + return teachable_agent + + +def interact_freely_with_user(): + """Starts a free-form chat between the user and TeachableAgent.""" + + # Create the agents. + print(colored("\nLoading previous memory (if any) from disk.", "light_cyan")) + teachable_agent = create_teachable_agent(reset_db=False) + user = UserProxyAgent("user", human_input_mode="ALWAYS") + + # Start the chat. + teachable_agent.initiate_chat(user, message="Greetings, I'm a teachable user assistant! What's on your mind today?") + + # Let the teachable agent remember things that should be learned from this chat. + teachable_agent.learn_from_user_feedback() + + # Wrap up. + teachable_agent.close_db() + + +if __name__ == "__main__": + """Lets the user test TeachableAgent interactively.""" + interact_freely_with_user() diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py new file mode 100644 index 00000000000..7a3367dbd72 --- /dev/null +++ b/test/agentchat/test_teachable_agent.py @@ -0,0 +1,172 @@ +try: + import openai + + skip = False +except ImportError: + skip = True +import pytest +import sys +from autogen import ConversableAgent, config_list_from_json +from autogen.agentchat.contrib.teachable_agent import TeachableAgent + + +try: + from termcolor import colored +except ImportError: + + def colored(x, *args, **kwargs): + return x + + +# Set verbosity levels to maximize code coverage. +qa_verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. +skill_verbosity = 3 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + +assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. +recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. +use_cache = False # If True, cached LLM calls will be skipped and responses pulled from cache. False exposes LLM non-determinism. + +# Specify the model to use by uncommenting one of the following lines. +# filter_dict={"model": ["gpt-4-0613"]} +# filter_dict={"model": ["gpt-3.5-turbo-0613"]} +# filter_dict={"model": ["gpt-4"]} +filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} + + +def create_teachable_agent(reset_db=False, verbosity=0): + """Instantiates a TeachableAgent using the settings from the top of this file.""" + # Load LLM inference endpoints from an env variable or a file + # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints + # and OAI_CONFIG_LIST_sample + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) + teachable_agent = TeachableAgent( + name="teachableagent", + llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, + teach_config={ + "verbosity": verbosity, + "reset_db": reset_db, + "path_to_db_dir": "./tmp/teachable_agent_db", + "recall_threshold": recall_threshold, + }, + ) + return teachable_agent + + +def check_agent_response(teachable_agent, user, correct_answer): + """Checks whether the agent's response contains the correct answer, and returns the number of errors (1 or 0).""" + agent_response = user.last_message(teachable_agent)["content"] + if correct_answer not in agent_response: + print(colored(f"\nTEST FAILED: EXPECTED ANSWER {correct_answer} NOT FOUND IN AGENT RESPONSE", "light_red")) + if assert_on_error: + assert correct_answer in agent_response + return 1 + else: + print(colored(f"\nTEST PASSED: EXPECTED ANSWER {correct_answer} FOUND IN AGENT RESPONSE", "light_cyan")) + return 0 + + +def use_question_answer_phrasing(): + """Tests whether the teachable agent can answer a question after being taught the answer in a previous chat.""" + print(colored("\nTEST QUESTION-ANSWER PHRASING", "light_cyan")) + num_errors, num_tests = 0, 0 + teachable_agent = create_teachable_agent( + reset_db=True, verbosity=qa_verbosity + ) # For a clean test, clear the agent's memory. + user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") + + # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. + teachable_agent.prepopulate_db() + + # Ask the teachable agent to do something using terminology it doesn't understand. + user.initiate_chat(recipient=teachable_agent, message="What is the twist of 5 and 7?") + + # Explain the terminology to the teachable agent. + user.send( + recipient=teachable_agent, + message="Actually, the twist of two or more numbers is their product minus their sum. Try again.", + ) + num_errors += check_agent_response(teachable_agent, user, "23") + num_tests += 1 + + # Let the teachable agent remember things that should be learned from this chat. + teachable_agent.learn_from_user_feedback() + + # Now start a new chat to clear the context, and require the teachable agent to use its new knowledge. + print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) + user.initiate_chat(recipient=teachable_agent, message="What's the twist of 8 and 3 and 2?") + num_errors += check_agent_response(teachable_agent, user, "35") + num_tests += 1 + + # Wrap up. + teachable_agent.close_db() + return num_errors, num_tests + + +def use_task_advice_pair_phrasing(): + """Tests whether the teachable agent can demonstrate a new skill after being taught a task-advice pair in a previous chat.""" + print(colored("\nTEST TASK-ADVICE PHRASING", "light_cyan")) + num_errors, num_tests = 0, 0 + teachable_agent = create_teachable_agent( + reset_db=True, verbosity=skill_verbosity # For a clean test, clear the teachable agent's memory. + ) + user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") + + # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. + teachable_agent.prepopulate_db() + + # Ask the teachable agent to do something, and provide some helpful advice. + user.initiate_chat( + recipient=teachable_agent, + message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.", + ) + num_errors += check_agent_response(teachable_agent, user, "23") + num_tests += 1 + + # Let the teachable agent remember things that should be learned from this chat. + teachable_agent.learn_from_user_feedback() + + # Now start a new chat to clear the context, and require the teachable agent to use its new knowledge. + print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) + user.initiate_chat(recipient=teachable_agent, message="Please calculate the twist of 8 and 3 and 2.") + num_errors += check_agent_response(teachable_agent, user, "35") + num_tests += 1 + + # Wrap up. + teachable_agent.close_db() + return num_errors, num_tests + + +@pytest.mark.skipif( + skip or not sys.version.startswith("3.9"), + reason="do not run if openai is not installed or py!=3.9", +) +def test_all(): + """Runs this file's unit tests.""" + total_num_errors, total_num_tests = 0, 0 + + num_trials = 1 # Set to a higher number to get a more accurate error rate. + for trial in range(num_trials): + num_errors, num_tests = use_question_answer_phrasing() + total_num_errors += num_errors + total_num_tests += num_tests + + num_errors, num_tests = use_task_advice_pair_phrasing() + total_num_errors += num_errors + total_num_tests += num_tests + + print(colored(f"\nTRIAL {trial + 1} OF {num_trials} FINISHED", "light_cyan")) + + if total_num_errors == 0: + print(colored("\nTEACHABLE AGENT TESTS FINISHED WITH ZERO ERRORS", "light_cyan")) + else: + print( + colored( + f"\nTEACHABLE AGENT TESTS FINISHED WITH {total_num_errors} / {total_num_tests} TOTAL ERRORS ({100.0 * total_num_errors / total_num_tests}%)", + "light_red", + ) + ) + + +if __name__ == "__main__": + """Runs this file's unit tests from the command line.""" + test_all() diff --git a/website/docs/Examples/AutoGen-AgentChat.md b/website/docs/Examples/AutoGen-AgentChat.md index 58ad28d3ded..0a95277d59d 100644 --- a/website/docs/Examples/AutoGen-AgentChat.md +++ b/website/docs/Examples/AutoGen-AgentChat.md @@ -16,5 +16,6 @@ Links to notebook examples: * [Automated Complex Task Solving by Group Chat (with 6 group member agents and 1 manager agent)](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_research.ipynb) * [Automated Continual Learning from New Data](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb) * [Teach Agents New Skills & Reuse via Automated Chat](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teaching.ipynb) +* [Teach Agents New Facts, User Preferences and Skills Beyond Coding](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teachability.ipynb) * [Automated Code Generation and Question Answering with Retrieval Augemented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb) * [Group Chat with Retrieval Augmented Generation (with 5 group member agents and 1 manager agent)](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_RAG.ipynb) diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md index a98080c1820..4267e85fd65 100644 --- a/website/docs/Use-Cases/agent_chat.md +++ b/website/docs/Use-Cases/agent_chat.md @@ -110,6 +110,7 @@ The figure below shows six examples of applications built using AutoGen. 5. **Agent Teaching and Learning** - Teach Agents New Skills & Reuse via Automated Chat - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teaching.ipynb) + - Teach Agents New Facts, User Preferences and Skills Beyond Coding - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teachability.ipynb) ## For Further Reading