From 93ee755af54c16612c43a33e8b9fd0ea33c3a1a5 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 20 Sep 2023 17:04:18 -0700 Subject: [PATCH 01/50] Initial commit. --- autogen/agentchat/contrib/teachable_agent.py | 218 +++++++++++++++++++ autogen/agentchat/contrib/text_analyzer.py | 21 ++ test/agentchat/test_teachable_agent.py | 10 + 3 files changed, 249 insertions(+) create mode 100644 autogen/agentchat/contrib/teachable_agent.py create mode 100644 autogen/agentchat/contrib/text_analyzer.py create mode 100644 test/agentchat/test_teachable_agent.py diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py new file mode 100644 index 00000000000..512597b038b --- /dev/null +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -0,0 +1,218 @@ +from autogen import oai +from autogen.agentchat.agent import Agent +from autogen.agentchat.assistant_agent import ConversableAgent +from autogen.agentchat.contrib.text_analyzer import TextAnalyzer +from typing import Callable, Dict, Optional, Union, List, Tuple, Any +import chromadb +from chromadb.config import Settings + + +class TeachableAgent(ConversableAgent): + """(Ongoing research) Teachable Assistant agent, using a vector database as a memory store. + """ + def __init__( + self, + name: str, + system_message: Optional[str] = "You are a helpful AI assistant.", + llm_config: Optional[Union[Dict, bool]] = None, + is_termination_msg: Optional[Callable[[Dict], bool]] = None, + max_consecutive_auto_reply: Optional[int] = None, + human_input_mode: Optional[str] = "NEVER", + code_execution_config: Optional[Union[Dict, bool]] = False, + **kwargs, + ): + super().__init__( + name, + system_message, + is_termination_msg, + max_consecutive_auto_reply, + human_input_mode, + code_execution_config=code_execution_config, + llm_config=llm_config, + **kwargs, + ) + # super().__init__(*args, **kwargs) + self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply) + + self.verbosity = 1 # 1 to print DB operations, 2 to add caller details. + self.db_method = 1 # 0=none, 1=Both tasks & facts + self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. + + self.text_analyzer = TextAnalyzer() + + if self.db_method > 0: + self.memo_store = MemoStore(self.verbosity) + self.memo_store.prepopulate() + self.user_comments = [] # Stores user comments until the end of the chat. + + def _generate_teachable_assistant_reply( + self, + messages: Optional[List[Dict]] = None, + sender: Optional[Agent] = None, + config: Optional[Any] = None, + ) -> Tuple[bool, Union[str, Dict, None]]: + llm_config = self.llm_config if config is None else config + if llm_config is False: + return False, None + if messages is None: + messages = self._oai_messages[sender] + + # messages contains the previous chat history, excluding the system message. + + # Get the last user message. + user_text = messages[-1]['content'] + + # To support quick and dirty tests of memory, clear the chat history if the user says "new chat". + if user_text == 'new chat': + self.clear_history() + print('\n\033[92m\033[0m ') + if self.db_method > 0: + # Save each user turn to the vector DB. + if len(self.user_comments) > 0: + for comment in self.user_comments: + # Consider whether to store something in the DB. + self.consider_memo_storage(comment, llm_config) + self.user_comments = [] + return True, 'New chat started.' + + if self.db_method > 0: + # This is a normal user turn. Keep track of it for potential storage later. + self.user_comments.append(user_text) + + if self.memo_store.num_memos > 0: + # Consider whether to retrieve something from the DB. + new_user_text = self.consider_memo_retrieval(user_text, llm_config) + if new_user_text != user_text: + # Make a copy of the message list, and replace the last user message with the new one. + messages = messages.copy() + messages[-1]['content'] = new_user_text + + ctxt = messages[-1].pop("context", None) # This peels off any "context" message from the list. + msgs = self._oai_system_message + messages + response = oai.ChatCompletion.create(context=ctxt, messages=msgs, **llm_config) + + return True, oai.ChatCompletion.extract_text_or_function_call(response)[0] + + def consider_memo_storage(self, comment, llm_config): + # Check for a problem-solution pair. + response = self.text_analyzer.analyze(llm_config, comment, + "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") + if 'yes' in response.lower(): + # Can we extract advice? + advice = self.text_analyzer.analyze(llm_config, comment, + "Copy any advice from the last user comment that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") + if 'none' not in advice.lower(): + # Yes. Extract the task. + task = self.text_analyzer.analyze(llm_config, comment, + "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") + # Generalize the task. + general_task = self.text_analyzer.analyze(llm_config, task, + "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") + # Add the task-advice (problem-solution) pair to the vector DB. + if self.verbosity >= 1: + print("\n\033[92m\033[0m ") + self.memo_store.add_input_output_pair(general_task, advice) + return + + # Check for a simple question. + response = self.text_analyzer.analyze(llm_config, comment, + "Does the last user comment contain a simple question? Answer with just one word, yes or no.") + if 'yes' in response.lower(): + # Ignore it. + return + + # Check for information to be learned. + response = self.text_analyzer.analyze(llm_config, comment, + "Does the last user comment contain information that might be useful later? Answer with just one word, yes or no.") + if 'yes' in response.lower(): + # Yes. What question would this information answer? + question = self.text_analyzer.analyze(llm_config, comment, + "Imagine that the user forgot this information in their last comment. How would they ask you for this information? Include no other text in your response.") + # Extract the information. + answer = self.text_analyzer.analyze(llm_config, comment, + "Copy the information from the last user comment that may be useful later.") + # Add the question-answer pair to the vector DB. + if self.verbosity >= 1: + print("\n\033[92m\033[0m ") + self.memo_store.add_input_output_pair(question, answer) + + def consider_memo_retrieval(self, comment, llm_config): + # Check for a question or task. + response = self.text_analyzer.analyze(llm_config, comment, + "Does the last user comment contain a question, task, or problem to solve? Answer with just one word, yes or no.") + if 'yes' in response.lower(): + # Distinguish between a question and a task. + response = self.text_analyzer.analyze(llm_config, comment, + "Would the last user comment be best described as a simple question question, or a complex task? Answer with just one word, question or task.") + if 'question' in response.lower(): + # Retrieve the answer. + uid, info = self.memo_store.get_nearest_memo(comment) + answer = self.memo_store.info_dict[uid] + info = "(Here is some information that might help answer the question:\n" + answer + ")" + if self.verbosity >= 1: + print('\n' + info) + user_text = comment + '\n' + info + return user_text + elif 'task' in response.lower(): + # Extract the task. + task = self.text_analyzer.analyze(llm_config, comment, + "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") + # Generalize the task. + general_task = self.text_analyzer.analyze(llm_config, task, + "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") + # Retrieve the advice. + uid, info = self.memo_store.get_nearest_memo(general_task) + advice = self.memo_store.info_dict[uid] + info = "(Here is some advice that might help:\n" + advice + ")" + if self.verbosity >= 1: + print('\n' + info) + user_text = comment + '\n' + info + return user_text + + # For anything else, just return the user comment. + return comment + + +class MemoStore(): + def __init__(self, verbosity): + self.verbosity = verbosity + self.db_client = chromadb.Client(Settings(anonymized_telemetry=False)) # In-memory by default. + self.vec_db = self.db_client.create_collection("memos") # The collection is the DB. + self.next_uid = 0 # Unique ID for each memo. Also serves as a count of total memos added. + self.num_memos = 0 + self.info_dict = {} # Maps a memo uid to information like answers or advice. + + def add_memo(self, text): + self.next_uid += 1 + self.num_memos += 1 + self.vec_db.add(documents=[text], ids=[str(self.next_uid)]) + if self.verbosity >= 1: + print("\n\033[92m\033[0m ".format(text)) + + def add_input_output_pair(self, input_text, output_text): + self.next_uid += 1 + self.num_memos += 1 + self.vec_db.add(documents=[input_text], ids=[str(self.next_uid)]) + self.info_dict[str(self.next_uid)] = output_text + if self.verbosity >= 1: + print("\n\033[92m\033[0m ".format(input_text, output_text)) + + def get_nearest_memo(self, query_text): + results = self.vec_db.query(query_texts=[query_text], n_results=1) + return results['ids'][0][0], results['documents'][0][0] + + def prepopulate(self): + # Add some random examples to the vector DB, just to make retrieval less trivial. + examples = [] + examples.append({'text': 'When I say papers I mean research papers, which are typically pdfs.', 'label': 'yes'}) + examples.append({'text': 'Please verify that each paper you listed actually uses langchain.', 'label': 'no'}) + examples.append({'text': 'Tell gpt the output should still be latex code.', 'label': 'no'}) + examples.append({'text': 'Hint: convert pdfs to text and then answer questions based on them.', 'label': 'yes'}) + examples.append({'text': 'To create a good PPT, include enough content to make it interesting.', 'label': 'yes'}) + examples.append({'text': 'No, for this case the columns should be aspects and the rows should be frameworks.', 'label': 'no'}) + examples.append({'text': 'When writing code, remember to include any libraries that are used.', 'label': 'yes'}) + examples.append({'text': 'Please summarize the papers by Eric Horvitz on bounded rationality.', 'label': 'no'}) + examples.append({'text': 'Compare the h-index of Daniel Weld and Oren Etzioni.', 'label': 'no'}) + examples.append({'text': 'Double check to be sure that the columns in a table correspond to what was asked for.', 'label': 'yes'}) + for example in examples: + self.add_input_output_pair(example['text'], example['label']) diff --git a/autogen/agentchat/contrib/text_analyzer.py b/autogen/agentchat/contrib/text_analyzer.py new file mode 100644 index 00000000000..652ecad16b2 --- /dev/null +++ b/autogen/agentchat/contrib/text_analyzer.py @@ -0,0 +1,21 @@ +from autogen import oai + + +class TextAnalyzer(): + """ Analyzes the content of text as instructed in each call. """ + def __init__(self): + # Prepare the system prompt. + system_message_text = """You are a helpful assistant specializing in content analysis.""" + system_message = {"role": "system", "content": system_message_text} + self.base_messages = [system_message] + + def analyze(self, llm_config, text_to_analyze, analysis_instructions): + # Assembled the messages. + messages = self.base_messages.copy() + messages.append({"role": "user", "content": text_to_analyze}) + messages.append({"role": "user", "content": analysis_instructions}) + + # Get the response. + response = oai.ChatCompletion.create(context=None, messages=messages, **llm_config) + response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] + return response_text diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py new file mode 100644 index 00000000000..68375520c9d --- /dev/null +++ b/test/agentchat/test_teachable_agent.py @@ -0,0 +1,10 @@ +import os + +os.environ["OPENAI_API_KEY"] = "" + +from autogen import UserProxyAgent +from autogen.agentchat.contrib.teachable_agent import TeachableAgent + +assistant = TeachableAgent("assistant") +user_proxy = UserProxyAgent("user_proxy") +user_proxy.initiate_chat(assistant, message="Hi") From 4d2ccf62dbd687526442ea5c9a93329ae477f8ac Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 26 Sep 2023 17:27:48 -0700 Subject: [PATCH 02/50] Disable LLM response caching. --- autogen/agentchat/contrib/teachable_agent.py | 5 +++-- autogen/agentchat/contrib/text_analyzer.py | 6 ++++-- test/agentchat/test_teachable_agent.py | 4 ---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 512597b038b..47c213dd94d 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -37,8 +37,9 @@ def __init__( self.verbosity = 1 # 1 to print DB operations, 2 to add caller details. self.db_method = 1 # 0=none, 1=Both tasks & facts self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. + self.use_cache = 0 # 1 to skip LLM calls made previously by relying on cached responses. - self.text_analyzer = TextAnalyzer() + self.text_analyzer = TextAnalyzer(self.use_cache) if self.db_method > 0: self.memo_store = MemoStore(self.verbosity) @@ -89,7 +90,7 @@ def _generate_teachable_assistant_reply( ctxt = messages[-1].pop("context", None) # This peels off any "context" message from the list. msgs = self._oai_system_message + messages - response = oai.ChatCompletion.create(context=ctxt, messages=msgs, **llm_config) + response = oai.ChatCompletion.create(context=ctxt, messages=msgs, use_cache=self.use_cache, **llm_config) return True, oai.ChatCompletion.extract_text_or_function_call(response)[0] diff --git a/autogen/agentchat/contrib/text_analyzer.py b/autogen/agentchat/contrib/text_analyzer.py index 652ecad16b2..dad66a3b19f 100644 --- a/autogen/agentchat/contrib/text_analyzer.py +++ b/autogen/agentchat/contrib/text_analyzer.py @@ -3,7 +3,9 @@ class TextAnalyzer(): """ Analyzes the content of text as instructed in each call. """ - def __init__(self): + def __init__(self, use_cache): + self.use_cache = use_cache + # Prepare the system prompt. system_message_text = """You are a helpful assistant specializing in content analysis.""" system_message = {"role": "system", "content": system_message_text} @@ -16,6 +18,6 @@ def analyze(self, llm_config, text_to_analyze, analysis_instructions): messages.append({"role": "user", "content": analysis_instructions}) # Get the response. - response = oai.ChatCompletion.create(context=None, messages=messages, **llm_config) + response = oai.ChatCompletion.create(context=None, messages=messages, use_cache=self.use_cache, **llm_config) response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] return response_text diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 68375520c9d..bd2e8ff96da 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -1,7 +1,3 @@ -import os - -os.environ["OPENAI_API_KEY"] = "" - from autogen import UserProxyAgent from autogen.agentchat.contrib.teachable_agent import TeachableAgent From fa033c0e6153e51eeb1a21d732dac18fa073cb85 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 3 Oct 2023 12:58:47 -0700 Subject: [PATCH 03/50] Add teachability option to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index b9ec7af33d7..86834ac0726 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ "blendsearch": ["flaml[blendsearch]"], "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"], "retrievechat": ["chromadb", "tiktoken", "sentence_transformers", "pypdf"], + "teachability": ["chromadb"], }, classifiers=[ "Programming Language :: Python :: 3", From 165d5cef8c34da81f52b84a250048bbd87476f05 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 3 Oct 2023 12:59:48 -0700 Subject: [PATCH 04/50] Modify test to use OAI_CONFIG_LIST as suggested in the docs. --- test/agentchat/test_teachable_agent.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index bd2e8ff96da..1e7a75797fb 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -1,6 +1,14 @@ -from autogen import UserProxyAgent +from autogen import UserProxyAgent, config_list_from_json from autogen.agentchat.contrib.teachable_agent import TeachableAgent -assistant = TeachableAgent("assistant") +# Load LLM inference endpoints from an env variable or a file +# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints +# and OAI_CONFIG_LIST_sample +config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") + +# Create the agents. +assistant = TeachableAgent("assistant", llm_config={"config_list": config_list}) user_proxy = UserProxyAgent("user_proxy") + +# Start the chat. user_proxy.initiate_chat(assistant, message="Hi") From 295f9e087f8ab052c865297a6f9da965a0ca4ca0 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 3 Oct 2023 18:35:00 -0700 Subject: [PATCH 05/50] Expand unit test. --- autogen/agentchat/contrib/teachable_agent.py | 21 ++++--- test/agentchat/test_teachable_agent.py | 61 ++++++++++++++++---- 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 47c213dd94d..44640ec9021 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -34,7 +34,7 @@ def __init__( # super().__init__(*args, **kwargs) self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply) - self.verbosity = 1 # 1 to print DB operations, 2 to add caller details. + self.verbosity = 0 # 1 to print DB operations, 2 to add caller details. self.db_method = 1 # 0=none, 1=Both tasks & facts self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. self.use_cache = 0 # 1 to skip LLM calls made previously by relying on cached responses. @@ -66,14 +66,8 @@ def _generate_teachable_assistant_reply( # To support quick and dirty tests of memory, clear the chat history if the user says "new chat". if user_text == 'new chat': self.clear_history() - print('\n\033[92m\033[0m ') - if self.db_method > 0: - # Save each user turn to the vector DB. - if len(self.user_comments) > 0: - for comment in self.user_comments: - # Consider whether to store something in the DB. - self.consider_memo_storage(comment, llm_config) - self.user_comments = [] + print('\n\033[92m\033[0m ') + self.learn_from_recent_user_comments() return True, 'New chat started.' if self.db_method > 0: @@ -94,6 +88,15 @@ def _generate_teachable_assistant_reply( return True, oai.ChatCompletion.extract_text_or_function_call(response)[0] + def learn_from_recent_user_comments(self): + if self.db_method > 0: + # Look at each user turn. + if len(self.user_comments) > 0: + for comment in self.user_comments: + # Consider whether to store something from this user turn in the DB. + self.consider_memo_storage(comment, self.llm_config) + self.user_comments = [] + def consider_memo_storage(self, comment, llm_config): # Check for a problem-solution pair. response = self.text_analyzer.analyze(llm_config, comment, diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 1e7a75797fb..08c54625adf 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -1,14 +1,55 @@ -from autogen import UserProxyAgent, config_list_from_json +import sys +from autogen import ConversableAgent, UserProxyAgent, config_list_from_json from autogen.agentchat.contrib.teachable_agent import TeachableAgent -# Load LLM inference endpoints from an env variable or a file -# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints -# and OAI_CONFIG_LIST_sample -config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") -# Create the agents. -assistant = TeachableAgent("assistant", llm_config={"config_list": config_list}) -user_proxy = UserProxyAgent("user_proxy") +def interact_freely_with_user(): + # Load LLM inference endpoints from an env variable or a file + # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints + # and OAI_CONFIG_LIST_sample + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") -# Start the chat. -user_proxy.initiate_chat(assistant, message="Hi") + # Create the agents. + assistant = TeachableAgent("assistant", llm_config={"config_list": config_list}) + user_proxy = UserProxyAgent("user_proxy", human_input_mode="ALWAYS") + + # Start the chat. + print("\n\033[92mTo clear the context and start a new chat, type 'new chat'\033[0m\n") + user_proxy.initiate_chat(assistant, message="Hi") + + +def test_question_answer_pair(): + # Load LLM inference endpoints from an env variable or a file + # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints + # and OAI_CONFIG_LIST_sample + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") + + # Create the agents. + agent = TeachableAgent("agent", llm_config={"config_list": config_list}) + user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") + + # Ask the agent to do something using terminology it doesn't understand. + user.initiate_chat(recipient=agent, message="What is the twist of 5 and 7?") + + # Explain the terminology to the agent. + user.send(recipient=agent, message="The twist of two or more numbers is their product minus their sum.") + agent_response = user.last_message(agent) + assert '23' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 23. + + # Let the agent remember things that should be learned from this chat. + agent.learn_from_recent_user_comments() + + # Now start a new chat to clear the context, and require the agent to use its new knowledge. + print('\n\033[92m\033[0m ') + user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") + agent_response = user.last_message(agent) + assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. + + +if __name__ == "__main__": + if len(sys.argv) > 1: + if sys.argv[1].startswith('i'): + interact_freely_with_user() + exit() + + test_question_answer_pair() From 515f2d9210c80c60c669cf06401def612f996126 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 4 Oct 2023 12:57:38 -0700 Subject: [PATCH 06/50] Complete unit test. --- autogen/agentchat/contrib/teachable_agent.py | 5 +- test/agentchat/test_teachable_agent.py | 52 +++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 44640ec9021..dd35e13f507 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -46,6 +46,9 @@ def __init__( self.memo_store.prepopulate() self.user_comments = [] # Stores user comments until the end of the chat. + def delete_db(self): + self.memo_store.db_client.reset() + def _generate_teachable_assistant_reply( self, messages: Optional[List[Dict]] = None, @@ -180,7 +183,7 @@ def consider_memo_retrieval(self, comment, llm_config): class MemoStore(): def __init__(self, verbosity): self.verbosity = verbosity - self.db_client = chromadb.Client(Settings(anonymized_telemetry=False)) # In-memory by default. + self.db_client = chromadb.Client(Settings(anonymized_telemetry=False, allow_reset=True)) # In-memory by default. self.vec_db = self.db_client.create_collection("memos") # The collection is the DB. self.next_uid = 0 # Unique ID for each memo. Also serves as a count of total memos added. self.num_memos = 0 diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 08c54625adf..a080674a634 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -3,6 +3,19 @@ from autogen.agentchat.contrib.teachable_agent import TeachableAgent +def in_color(text, color): + # Available colors: + # 90 = grey + # 91 = red + # 92 = green + # 93 = yellow + # 94 = blue + # 95 = magenta + # 96 = cyan + # 97 = white + return "\033[{}m".format(color) + text + "\033[0m" + + def interact_freely_with_user(): # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints @@ -14,7 +27,7 @@ def interact_freely_with_user(): user_proxy = UserProxyAgent("user_proxy", human_input_mode="ALWAYS") # Start the chat. - print("\n\033[92mTo clear the context and start a new chat, type 'new chat'\033[0m\n") + print(in_color("\nTo clear the context and start a new chat, type 'new chat'.", 93)) user_proxy.initiate_chat(assistant, message="Hi") @@ -40,11 +53,44 @@ def test_question_answer_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print('\n\033[92m\033[0m ') + print(in_color("", 92)) user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") agent_response = user.last_message(agent) assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. + # End of test + agent.delete_db() + print(in_color("", 92)) + + +def test_task_advice_pair(): + # Load LLM inference endpoints from an env variable or a file + # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints + # and OAI_CONFIG_LIST_sample + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") + + # Create the agents. + agent = TeachableAgent("agent", llm_config={"config_list": config_list}) + user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") + + # Ask the agent to do something, and provide some helpful advice. + user.initiate_chat(recipient=agent, message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.") + agent_response = user.last_message(agent) + assert '23' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 23. + + # Let the agent remember things that should be learned from this chat. + agent.learn_from_recent_user_comments() + + # Now start a new chat to clear the context, and require the agent to use its new knowledge. + print(in_color("", 92)) + user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") + agent_response = user.last_message(agent) + assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. + + # End of test + agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. + print(in_color("", 92)) + if __name__ == "__main__": if len(sys.argv) > 1: @@ -53,3 +99,5 @@ def test_question_answer_pair(): exit() test_question_answer_pair() + test_task_advice_pair() + print(in_color("\n", 92)) From 3443e572ec6bf4cdf4f7c2f167d1e5376d41c44b Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 5 Oct 2023 17:57:51 -0700 Subject: [PATCH 07/50] Add filter_dict --- test/agentchat/test_teachable_agent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index a080674a634..be108078316 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -20,7 +20,7 @@ def interact_freely_with_user(): # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample - config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # Create the agents. assistant = TeachableAgent("assistant", llm_config={"config_list": config_list}) @@ -35,7 +35,7 @@ def test_question_answer_pair(): # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample - config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # Create the agents. agent = TeachableAgent("agent", llm_config={"config_list": config_list}) @@ -67,7 +67,7 @@ def test_task_advice_pair(): # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample - config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # Create the agents. agent = TeachableAgent("agent", llm_config={"config_list": config_list}) From dfe47a544f330f9c52b22aec195159e3c6a01cac Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 5 Oct 2023 18:44:32 -0700 Subject: [PATCH 08/50] details --- autogen/agentchat/contrib/teachable_agent.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index dd35e13f507..6e29bf023de 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -37,7 +37,7 @@ def __init__( self.verbosity = 0 # 1 to print DB operations, 2 to add caller details. self.db_method = 1 # 0=none, 1=Both tasks & facts self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. - self.use_cache = 0 # 1 to skip LLM calls made previously by relying on cached responses. + self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. self.text_analyzer = TextAnalyzer(self.use_cache) @@ -66,7 +66,7 @@ def _generate_teachable_assistant_reply( # Get the last user message. user_text = messages[-1]['content'] - # To support quick and dirty tests of memory, clear the chat history if the user says "new chat". + # To let an interactive user test memory, clear the chat history if the user says "new chat". if user_text == 'new chat': self.clear_history() print('\n\033[92m\033[0m ') @@ -88,8 +88,9 @@ def _generate_teachable_assistant_reply( ctxt = messages[-1].pop("context", None) # This peels off any "context" message from the list. msgs = self._oai_system_message + messages response = oai.ChatCompletion.create(context=ctxt, messages=msgs, use_cache=self.use_cache, **llm_config) + response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] - return True, oai.ChatCompletion.extract_text_or_function_call(response)[0] + return True, response_text def learn_from_recent_user_comments(self): if self.db_method > 0: From c61d69c9fc53a5e03f5b37e0bd042dac47e75e75 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 6 Oct 2023 18:49:21 -0700 Subject: [PATCH 09/50] AnalysisAgent --- autogen/agentchat/contrib/analysis_agent.py | 63 ++++++++++++++++++++ autogen/agentchat/contrib/teachable_agent.py | 45 +++++++++----- 2 files changed, 94 insertions(+), 14 deletions(-) create mode 100644 autogen/agentchat/contrib/analysis_agent.py diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/analysis_agent.py new file mode 100644 index 00000000000..bdb49d5c448 --- /dev/null +++ b/autogen/agentchat/contrib/analysis_agent.py @@ -0,0 +1,63 @@ +from autogen import oai +from autogen.agentchat.agent import Agent +from autogen.agentchat.assistant_agent import ConversableAgent +from typing import Callable, Dict, Optional, Union, List, Tuple, Any + + +class AnalysisAgent(ConversableAgent): + """(Ongoing research) Text Analysis agent. + """ + def __init__( + self, + name: str, + system_message: Optional[str] = "You are a helpful assistant specializing in content analysis.", + llm_config: Optional[Union[Dict, bool]] = None, + is_termination_msg: Optional[Callable[[Dict], bool]] = None, + max_consecutive_auto_reply: Optional[int] = None, + human_input_mode: Optional[str] = "NEVER", + code_execution_config: Optional[Union[Dict, bool]] = False, + **kwargs, + ): + super().__init__( + name, + system_message, + is_termination_msg, + max_consecutive_auto_reply, + human_input_mode, + code_execution_config=code_execution_config, + llm_config=llm_config, + **kwargs, + ) + self.register_reply(Agent, AnalysisAgent._generate_analysis) + + self.verbosity = 0 # 1 to print DB operations, 2 to add caller details. + self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. + + def _generate_analysis( + self, + messages: Optional[List[Dict]] = None, + sender: Optional[Agent] = None, + config: Optional[Any] = None, + ) -> Tuple[bool, Union[str, Dict, None]]: + llm_config = self.llm_config if config is None else config + if llm_config is False: + return False, None + if messages is None: + messages = self._oai_messages[sender] + + # messages contains the previous chat history, excluding the system message. + + # Get the last user message. + user_text = messages[-1]['content'] + text_to_analyze, analysis_instructions = user_text.split('\n') + + messages = [] + messages.append({"role": "user", "content": text_to_analyze}) + messages.append({"role": "user", "content": analysis_instructions}) + + msgs = self._oai_system_message + messages + + response = oai.ChatCompletion.create(context=None, messages=msgs, use_cache=self.use_cache, **llm_config) + response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] + + return True, response_text diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 6e29bf023de..6814dedd880 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -2,6 +2,7 @@ from autogen.agentchat.agent import Agent from autogen.agentchat.assistant_agent import ConversableAgent from autogen.agentchat.contrib.text_analyzer import TextAnalyzer +from autogen.agentchat.contrib.analysis_agent import AnalysisAgent from typing import Callable, Dict, Optional, Union, List, Tuple, Any import chromadb from chromadb.config import Settings @@ -31,15 +32,18 @@ def __init__( llm_config=llm_config, **kwargs, ) - # super().__init__(*args, **kwargs) self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply) self.verbosity = 0 # 1 to print DB operations, 2 to add caller details. self.db_method = 1 # 0=none, 1=Both tasks & facts self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. + self.use_analyzer_agent = 1 # 1 to use the new analysis agent, 0 to use the old text analyzer. - self.text_analyzer = TextAnalyzer(self.use_cache) + if self.use_analyzer_agent: + self.analyzer = AnalysisAgent("analyzer", llm_config=llm_config) + else: + self.text_analyzer = TextAnalyzer(self.use_cache) if self.db_method > 0: self.memo_store = MemoStore(self.verbosity) @@ -55,6 +59,10 @@ def _generate_teachable_assistant_reply( sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: + if self.use_analyzer_agent and (sender == self.analyzer): + # This is a response from the text analyzer. Don't reply to it. + return True, None + llm_config = self.llm_config if config is None else config if llm_config is False: return False, None @@ -103,18 +111,18 @@ def learn_from_recent_user_comments(self): def consider_memo_storage(self, comment, llm_config): # Check for a problem-solution pair. - response = self.text_analyzer.analyze(llm_config, comment, + response = self.analyze(llm_config, comment, "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Can we extract advice? - advice = self.text_analyzer.analyze(llm_config, comment, + advice = self.analyze(llm_config, comment, "Copy any advice from the last user comment that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") if 'none' not in advice.lower(): # Yes. Extract the task. - task = self.text_analyzer.analyze(llm_config, comment, + task = self.analyze(llm_config, comment, "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") # Generalize the task. - general_task = self.text_analyzer.analyze(llm_config, task, + general_task = self.analyze(llm_config, task, "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Add the task-advice (problem-solution) pair to the vector DB. if self.verbosity >= 1: @@ -123,21 +131,21 @@ def consider_memo_storage(self, comment, llm_config): return # Check for a simple question. - response = self.text_analyzer.analyze(llm_config, comment, + response = self.analyze(llm_config, comment, "Does the last user comment contain a simple question? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Ignore it. return # Check for information to be learned. - response = self.text_analyzer.analyze(llm_config, comment, + response = self.analyze(llm_config, comment, "Does the last user comment contain information that might be useful later? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Yes. What question would this information answer? - question = self.text_analyzer.analyze(llm_config, comment, + question = self.analyze(llm_config, comment, "Imagine that the user forgot this information in their last comment. How would they ask you for this information? Include no other text in your response.") # Extract the information. - answer = self.text_analyzer.analyze(llm_config, comment, + answer = self.analyze(llm_config, comment, "Copy the information from the last user comment that may be useful later.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: @@ -146,11 +154,11 @@ def consider_memo_storage(self, comment, llm_config): def consider_memo_retrieval(self, comment, llm_config): # Check for a question or task. - response = self.text_analyzer.analyze(llm_config, comment, + response = self.analyze(llm_config, comment, "Does the last user comment contain a question, task, or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Distinguish between a question and a task. - response = self.text_analyzer.analyze(llm_config, comment, + response = self.analyze(llm_config, comment, "Would the last user comment be best described as a simple question question, or a complex task? Answer with just one word, question or task.") if 'question' in response.lower(): # Retrieve the answer. @@ -163,10 +171,10 @@ def consider_memo_retrieval(self, comment, llm_config): return user_text elif 'task' in response.lower(): # Extract the task. - task = self.text_analyzer.analyze(llm_config, comment, + task = self.analyze(llm_config, comment, "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") # Generalize the task. - general_task = self.text_analyzer.analyze(llm_config, task, + general_task = self.analyze(llm_config, task, "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Retrieve the advice. uid, info = self.memo_store.get_nearest_memo(general_task) @@ -180,6 +188,15 @@ def consider_memo_retrieval(self, comment, llm_config): # For anything else, just return the user comment. return comment + def analyze(self, llm_config, text_to_analyze, analysis_instructions): + if self.use_analyzer_agent: + message_text = '\n'.join([text_to_analyze, analysis_instructions]) + self.initiate_chat(recipient=self.analyzer, message=message_text) + response_text = self.last_message(self.analyzer)["content"] + else: + response_text = self.text_analyzer.analyze(llm_config, text_to_analyze, analysis_instructions) + return response_text + class MemoStore(): def __init__(self, verbosity): From 748ffcd13b48e20cd94f3bdc11549f13f959976f Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Mon, 9 Oct 2023 15:43:50 -0700 Subject: [PATCH 10/50] details --- autogen/agentchat/contrib/analysis_agent.py | 4 ++-- autogen/agentchat/contrib/teachable_agent.py | 2 ++ test/agentchat/test_teachable_agent.py | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/analysis_agent.py index bdb49d5c448..9a953189fde 100644 --- a/autogen/agentchat/contrib/analysis_agent.py +++ b/autogen/agentchat/contrib/analysis_agent.py @@ -30,7 +30,6 @@ def __init__( ) self.register_reply(Agent, AnalysisAgent._generate_analysis) - self.verbosity = 0 # 1 to print DB operations, 2 to add caller details. self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. def _generate_analysis( @@ -39,6 +38,7 @@ def _generate_analysis( sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: + # Are the following tests necessary? llm_config = self.llm_config if config is None else config if llm_config is False: return False, None @@ -49,7 +49,7 @@ def _generate_analysis( # Get the last user message. user_text = messages[-1]['content'] - text_to_analyze, analysis_instructions = user_text.split('\n') + text_to_analyze, analysis_instructions = user_text.split('\n') # TODO: Use a different separator. messages = [] messages.append({"role": "user", "content": text_to_analyze}) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 6814dedd880..be2549d8ea6 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -63,6 +63,7 @@ def _generate_teachable_assistant_reply( # This is a response from the text analyzer. Don't reply to it. return True, None + # Are the following tests necessary? llm_config = self.llm_config if config is None else config if llm_config is False: return False, None @@ -201,6 +202,7 @@ def analyze(self, llm_config, text_to_analyze, analysis_instructions): class MemoStore(): def __init__(self, verbosity): self.verbosity = verbosity + # TODO: Expose an option to persist the DB to a file on disk. self.db_client = chromadb.Client(Settings(anonymized_telemetry=False, allow_reset=True)) # In-memory by default. self.vec_db = self.db_client.create_collection("memos") # The collection is the DB. self.next_uid = 0 # Unique ID for each memo. Also serves as a count of total memos added. diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index be108078316..9819078a7cd 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -32,6 +32,8 @@ def interact_freely_with_user(): def test_question_answer_pair(): + print(in_color("\n", 92)) + # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample @@ -64,6 +66,8 @@ def test_question_answer_pair(): def test_task_advice_pair(): + print(in_color("\n", 92)) + # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample @@ -100,4 +104,4 @@ def test_task_advice_pair(): test_question_answer_pair() test_task_advice_pair() - print(in_color("\n", 92)) + print(in_color("\n", 92)) From 25faf14a9c419c714542546007b3b3bd2eb119fd Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 10 Oct 2023 14:31:43 -0700 Subject: [PATCH 11/50] More documentation and debug output. --- autogen/agentchat/contrib/analysis_agent.py | 18 ++++-- autogen/agentchat/contrib/teachable_agent.py | 62 ++++++++++++++------ test/agentchat/test_teachable_agent.py | 2 +- 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/analysis_agent.py index 9a953189fde..cf8a94556aa 100644 --- a/autogen/agentchat/contrib/analysis_agent.py +++ b/autogen/agentchat/contrib/analysis_agent.py @@ -38,22 +38,28 @@ def _generate_analysis( sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: + """Analyzes the given text as instructed, and returns the analysis.""" # Are the following tests necessary? + assert config is None # TODO: Remove this line. llm_config = self.llm_config if config is None else config + + assert llm_config is not False # TODO: Remove this line. if llm_config is False: return False, None + + assert messages is not None # TODO: Remove this line. if messages is None: messages = self._oai_messages[sender] - # messages contains the previous chat history, excluding the system message. - - # Get the last user message. + # Extract the text and instructions from the last user message. user_text = messages[-1]['content'] text_to_analyze, analysis_instructions = user_text.split('\n') # TODO: Use a different separator. - messages = [] - messages.append({"role": "user", "content": text_to_analyze}) - messages.append({"role": "user", "content": analysis_instructions}) + # Assemble the messages. + messages = [ + {"role": "user", "content": text_to_analyze}, + {"role": "user", "content": analysis_instructions} + ] msgs = self._oai_system_message + messages diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index be2549d8ea6..d9ccb7197dd 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -8,6 +8,19 @@ from chromadb.config import Settings +def in_color(text, color): + # Available colors: + # 90 = grey + # 91 = red + # 92 = green + # 93 = yellow + # 94 = blue + # 95 = magenta + # 96 = cyan + # 97 = white + return "\033[{}m".format(color) + text + "\033[0m" + + class TeachableAgent(ConversableAgent): """(Ongoing research) Teachable Assistant agent, using a vector database as a memory store. """ @@ -34,7 +47,7 @@ def __init__( ) self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply) - self.verbosity = 0 # 1 to print DB operations, 2 to add caller details. + self.verbosity = 2 # 1 to print DB operations, 2 to add caller details. self.db_method = 1 # 0=none, 1=Both tasks & facts self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. @@ -59,14 +72,23 @@ def _generate_teachable_assistant_reply( sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: + """ + Generates a reply to the last user message, after querying the memo store for relevant information. + Uses self.analyzer to make decisions about memo storage and retrieval. + """ if self.use_analyzer_agent and (sender == self.analyzer): # This is a response from the text analyzer. Don't reply to it. return True, None - # Are the following tests necessary? + # Are the following checks needed? + assert config is None # TODO: Remove this line. llm_config = self.llm_config if config is None else config + + assert llm_config is not False # TODO: Remove this line. if llm_config is False: return False, None + + assert messages is not None # TODO: Remove this line. if messages is None: messages = self._oai_messages[sender] @@ -111,17 +133,18 @@ def learn_from_recent_user_comments(self): self.user_comments = [] def consider_memo_storage(self, comment, llm_config): + """Decides whether to store something from this user turn in the DB.""" # Check for a problem-solution pair. response = self.analyze(llm_config, comment, "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Can we extract advice? advice = self.analyze(llm_config, comment, - "Copy any advice from the last user comment that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") + "Briefly copy any advice from the last user comment that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") if 'none' not in advice.lower(): # Yes. Extract the task. task = self.analyze(llm_config, comment, - "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") + "Briefly copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") # Generalize the task. general_task = self.analyze(llm_config, task, "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") @@ -147,27 +170,28 @@ def consider_memo_storage(self, comment, llm_config): "Imagine that the user forgot this information in their last comment. How would they ask you for this information? Include no other text in your response.") # Extract the information. answer = self.analyze(llm_config, comment, - "Copy the information from the last user comment that may be useful later.") + "Briefly copy the information from the last user comment that may be useful later.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: print("\n\033[92m\033[0m ") self.memo_store.add_input_output_pair(question, answer) def consider_memo_retrieval(self, comment, llm_config): + """Decides whether to retrieve something from the DB.""" # Check for a question or task. response = self.analyze(llm_config, comment, "Does the last user comment contain a question, task, or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Distinguish between a question and a task. response = self.analyze(llm_config, comment, - "Would the last user comment be best described as a simple question question, or a complex task? Answer with just one word, question or task.") + "Would the last user comment be best described as a simple question, or some kind of task? Answer with just one word, question or task.") if 'question' in response.lower(): - # Retrieve the answer. - uid, info = self.memo_store.get_nearest_memo(comment) - answer = self.memo_store.info_dict[uid] - info = "(Here is some information that might help answer the question:\n" + answer + ")" + # Retrieve the best-matching memo. + # TODO: A more sophisticated memo filtering & thresholding process is needed here. + input_text, output_text = self.memo_store.get_nearest_memo(comment) + info = "(Here is some information that might help answer the question:\n" + output_text + ")" if self.verbosity >= 1: - print('\n' + info) + print(in_color('\nAppended to last user message...\n' + info + '\n', 93)) user_text = comment + '\n' + info return user_text elif 'task' in response.lower(): @@ -178,11 +202,10 @@ def consider_memo_retrieval(self, comment, llm_config): general_task = self.analyze(llm_config, task, "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Retrieve the advice. - uid, info = self.memo_store.get_nearest_memo(general_task) - advice = self.memo_store.info_dict[uid] - info = "(Here is some advice that might help:\n" + advice + ")" + input_text, output_text = self.memo_store.get_nearest_memo(general_task) + info = "(Here is some advice that might help:\n" + output_text + ")" if self.verbosity >= 1: - print('\n' + info) + print(in_color('\nAppended to last user message...\n' + info + '\n', 93)) user_text = comment + '\n' + info return user_text @@ -190,6 +213,7 @@ def consider_memo_retrieval(self, comment, llm_config): return comment def analyze(self, llm_config, text_to_analyze, analysis_instructions): + ### Calls either the AnalysisAgent or the older TextAnalyzer. """ if self.use_analyzer_agent: message_text = '\n'.join([text_to_analyze, analysis_instructions]) self.initiate_chat(recipient=self.analyzer, message=message_text) @@ -226,10 +250,14 @@ def add_input_output_pair(self, input_text, output_text): def get_nearest_memo(self, query_text): results = self.vec_db.query(query_texts=[query_text], n_results=1) - return results['ids'][0][0], results['documents'][0][0] + uid, input_text = results['ids'][0][0], results['documents'][0][0] + output_text = self.info_dict[uid] + if self.verbosity >= 1: + print("\n\033[92m\033[0m ".format(input_text, output_text)) + return input_text, output_text def prepopulate(self): - # Add some random examples to the vector DB, just to make retrieval less trivial. + """ Adds arbitrary examples to the vector DB, just to make retrieval less trivial. """ examples = [] examples.append({'text': 'When I say papers I mean research papers, which are typically pdfs.', 'label': 'yes'}) examples.append({'text': 'Please verify that each paper you listed actually uses langchain.', 'label': 'no'}) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 9819078a7cd..92584d09838 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -87,7 +87,7 @@ def test_task_advice_pair(): # Now start a new chat to clear the context, and require the agent to use its new knowledge. print(in_color("", 92)) - user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") + user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") agent_response = user.last_message(agent) assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. From 66f4c61d34ed7fd5c2ab7a4c30634d887272cd79 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 10 Oct 2023 16:21:17 -0700 Subject: [PATCH 12/50] Support retrieval of any number of relevant memos, including zero. --- autogen/agentchat/contrib/teachable_agent.py | 77 ++++++++++++-------- test/agentchat/test_teachable_agent.py | 16 ++-- 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index d9ccb7197dd..6385afbd4df 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -100,8 +100,8 @@ def _generate_teachable_assistant_reply( # To let an interactive user test memory, clear the chat history if the user says "new chat". if user_text == 'new chat': self.clear_history() - print('\n\033[92m\033[0m ') self.learn_from_recent_user_comments() + print(in_color("\n", 96)) return True, 'New chat started.' if self.db_method > 0: @@ -124,6 +124,7 @@ def _generate_teachable_assistant_reply( return True, response_text def learn_from_recent_user_comments(self): + print(in_color("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 96)) if self.db_method > 0: # Look at each user turn. if len(self.user_comments) > 0: @@ -150,7 +151,7 @@ def consider_memo_storage(self, comment, llm_config): "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Add the task-advice (problem-solution) pair to the vector DB. if self.verbosity >= 1: - print("\n\033[92m\033[0m ") + print(in_color("\nFOUND TASK-ADVICE PAIR", 92)) self.memo_store.add_input_output_pair(general_task, advice) return @@ -173,7 +174,7 @@ def consider_memo_storage(self, comment, llm_config): "Briefly copy the information from the last user comment that may be useful later.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: - print("\n\033[92m\033[0m ") + print(in_color("\nFOUND QUESTION-ANSWER PAIR", 92)) self.memo_store.add_input_output_pair(question, answer) def consider_memo_retrieval(self, comment, llm_config): @@ -183,35 +184,37 @@ def consider_memo_retrieval(self, comment, llm_config): "Does the last user comment contain a question, task, or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Distinguish between a question and a task. + memo_lookup_key = comment response = self.analyze(llm_config, comment, "Would the last user comment be best described as a simple question, or some kind of task? Answer with just one word, question or task.") - if 'question' in response.lower(): - # Retrieve the best-matching memo. - # TODO: A more sophisticated memo filtering & thresholding process is needed here. - input_text, output_text = self.memo_store.get_nearest_memo(comment) - info = "(Here is some information that might help answer the question:\n" + output_text + ")" - if self.verbosity >= 1: - print(in_color('\nAppended to last user message...\n' + info + '\n', 93)) - user_text = comment + '\n' + info - return user_text - elif 'task' in response.lower(): + if 'task' in response.lower(): # Extract the task. task = self.analyze(llm_config, comment, "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") # Generalize the task. general_task = self.analyze(llm_config, task, "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") - # Retrieve the advice. - input_text, output_text = self.memo_store.get_nearest_memo(general_task) - info = "(Here is some advice that might help:\n" + output_text + ")" - if self.verbosity >= 1: - print(in_color('\nAppended to last user message...\n' + info + '\n', 93)) - user_text = comment + '\n' + info - return user_text + # Use the generalized task as the lookup key. + memo_lookup_key = general_task + + # Append any relevant memos. + return comment + self.retrieve_relevant_memos(memo_lookup_key) # For anything else, just return the user comment. return comment + def retrieve_relevant_memos(self, input_text): + if self.verbosity >= 1: + print(in_color('\nLOOK FOR RELEVANT MEMOS', 93)) + memo_texts = '' + memos = self.memo_store.get_related_memos(input_text) + for memo in memos: + info = "(Here is some information that might help:\n" + memo[1] + ")" + if self.verbosity >= 1: + print(in_color('\nMEMO APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 93)) + memo_texts = memo_texts + '\n' + info + return memo_texts + def analyze(self, llm_config, text_to_analyze, analysis_instructions): ### Calls either the AnalysisAgent or the older TextAnalyzer. """ if self.use_analyzer_agent: @@ -233,28 +236,40 @@ def __init__(self, verbosity): self.num_memos = 0 self.info_dict = {} # Maps a memo uid to information like answers or advice. - def add_memo(self, text): - self.next_uid += 1 - self.num_memos += 1 - self.vec_db.add(documents=[text], ids=[str(self.next_uid)]) - if self.verbosity >= 1: - print("\n\033[92m\033[0m ".format(text)) - def add_input_output_pair(self, input_text, output_text): + """ Adds an input-output pair to the vector DB. """ self.next_uid += 1 self.num_memos += 1 self.vec_db.add(documents=[input_text], ids=[str(self.next_uid)]) self.info_dict[str(self.next_uid)] = output_text if self.verbosity >= 1: - print("\n\033[92m\033[0m ".format(input_text, output_text)) + print(in_color("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}".format( + input_text, output_text), 92)) def get_nearest_memo(self, query_text): + """ Retrieves the nearest memo to the given query text. """ results = self.vec_db.query(query_texts=[query_text], n_results=1) - uid, input_text = results['ids'][0][0], results['documents'][0][0] + uid, input_text, distance = results['ids'][0][0], results['documents'][0][0], results['distances'][0][0] output_text = self.info_dict[uid] if self.verbosity >= 1: - print("\n\033[92m\033[0m ".format(input_text, output_text)) - return input_text, output_text + print(in_color("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance), 92)) + return input_text, output_text, distance + + def get_related_memos(self, query_text, threshold=1.0): + """ Retrieves memos that are related to the given query text with the threshold. """ + results = self.vec_db.query(query_texts=[query_text], n_results=4) + memos = [] + for i in range(len(results['ids'])): + uid, input_text, distance = results['ids'][i][0], results['documents'][i][0], results['distances'][i][0] + if distance < threshold: + output_text = self.info_dict[uid] + if self.verbosity >= 1: + print(in_color( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance), 92)) + memos.append((input_text, output_text, distance)) + return memos def prepopulate(self): """ Adds arbitrary examples to the vector DB, just to make retrieval less trivial. """ diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 92584d09838..07503898ac4 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -27,12 +27,12 @@ def interact_freely_with_user(): user_proxy = UserProxyAgent("user_proxy", human_input_mode="ALWAYS") # Start the chat. - print(in_color("\nTo clear the context and start a new chat, type 'new chat'.", 93)) + print(in_color("\nTo clear the context and start a new chat, type 'new chat'.", 96)) user_proxy.initiate_chat(assistant, message="Hi") def test_question_answer_pair(): - print(in_color("\n", 92)) + print(in_color("\n", 96)) # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints @@ -55,18 +55,18 @@ def test_question_answer_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(in_color("", 92)) + print(in_color("\n", 96)) user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") agent_response = user.last_message(agent) assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. # End of test agent.delete_db() - print(in_color("", 92)) + print(in_color("", 96)) def test_task_advice_pair(): - print(in_color("\n", 92)) + print(in_color("\n", 96)) # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints @@ -86,14 +86,14 @@ def test_task_advice_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(in_color("", 92)) + print(in_color("\n", 96)) user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") agent_response = user.last_message(agent) assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. # End of test agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. - print(in_color("", 92)) + print(in_color("", 96)) if __name__ == "__main__": @@ -104,4 +104,4 @@ def test_task_advice_pair(): test_question_answer_pair() test_task_advice_pair() - print(in_color("\n", 92)) + print(in_color("\n", 96)) From e796832d4daaf9bf6f2d2b37633932d908afee4f Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 10 Oct 2023 17:16:09 -0700 Subject: [PATCH 13/50] More robust analysis separator. --- autogen/agentchat/contrib/analysis_agent.py | 2 +- autogen/agentchat/contrib/teachable_agent.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/analysis_agent.py index cf8a94556aa..a84e80e9f71 100644 --- a/autogen/agentchat/contrib/analysis_agent.py +++ b/autogen/agentchat/contrib/analysis_agent.py @@ -53,7 +53,7 @@ def _generate_analysis( # Extract the text and instructions from the last user message. user_text = messages[-1]['content'] - text_to_analyze, analysis_instructions = user_text.split('\n') # TODO: Use a different separator. + text_to_analyze, analysis_instructions = user_text.split('\nAnalysis: ') # Assemble the messages. messages = [ diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 6385afbd4df..fae1869ae46 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -97,7 +97,7 @@ def _generate_teachable_assistant_reply( # Get the last user message. user_text = messages[-1]['content'] - # To let an interactive user test memory, clear the chat history if the user says "new chat". + # To help an interactive user test memory, clear the chat history if the user says "new chat". if user_text == 'new chat': self.clear_history() self.learn_from_recent_user_comments() @@ -208,6 +208,15 @@ def retrieve_relevant_memos(self, input_text): print(in_color('\nLOOK FOR RELEVANT MEMOS', 93)) memo_texts = '' memos = self.memo_store.get_related_memos(input_text) + + if self.verbosity >= 1: + # Was anything retrieved? + if len(memos) == 0: + # No. Look at the closest memo. + print(in_color('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD...', 93)) + memo = self.memo_store.get_nearest_memo(input_text) + print(memo) + for memo in memos: info = "(Here is some information that might help:\n" + memo[1] + ")" if self.verbosity >= 1: @@ -218,7 +227,7 @@ def retrieve_relevant_memos(self, input_text): def analyze(self, llm_config, text_to_analyze, analysis_instructions): ### Calls either the AnalysisAgent or the older TextAnalyzer. """ if self.use_analyzer_agent: - message_text = '\n'.join([text_to_analyze, analysis_instructions]) + message_text = '\nAnalysis: '.join([text_to_analyze, analysis_instructions]) self.initiate_chat(recipient=self.analyzer, message=message_text) response_text = self.last_message(self.analyzer)["content"] else: @@ -256,7 +265,7 @@ def get_nearest_memo(self, query_text): input_text, output_text, distance), 92)) return input_text, output_text, distance - def get_related_memos(self, query_text, threshold=1.0): + def get_related_memos(self, query_text, threshold=1.4): """ Retrieves memos that are related to the given query text with the threshold. """ results = self.vec_db.query(query_texts=[query_text], n_results=4) memos = [] From b31141f516b6c1c3b41d06345bb3acdcc6f73aa2 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 10 Oct 2023 17:50:58 -0700 Subject: [PATCH 14/50] cleanup --- autogen/agentchat/contrib/teachable_agent.py | 111 ++++++++----------- test/agentchat/test_teachable_agent.py | 33 +++--- 2 files changed, 61 insertions(+), 83 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index fae1869ae46..646cc0ab76b 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -1,24 +1,18 @@ from autogen import oai from autogen.agentchat.agent import Agent from autogen.agentchat.assistant_agent import ConversableAgent -from autogen.agentchat.contrib.text_analyzer import TextAnalyzer from autogen.agentchat.contrib.analysis_agent import AnalysisAgent from typing import Callable, Dict, Optional, Union, List, Tuple, Any import chromadb from chromadb.config import Settings -def in_color(text, color): - # Available colors: - # 90 = grey - # 91 = red - # 92 = green - # 93 = yellow - # 94 = blue - # 95 = magenta - # 96 = cyan - # 97 = white - return "\033[{}m".format(color) + text + "\033[0m" +try: + from termcolor import colored +except ImportError: + + def colored(x, *args, **kwargs): + return x class TeachableAgent(ConversableAgent): @@ -48,20 +42,15 @@ def __init__( self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply) self.verbosity = 2 # 1 to print DB operations, 2 to add caller details. - self.db_method = 1 # 0=none, 1=Both tasks & facts self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. - self.use_analyzer_agent = 1 # 1 to use the new analysis agent, 0 to use the old text analyzer. + self.recall_threshold = 1.4 # The distance threshold for retrieving memos from the DB. - if self.use_analyzer_agent: - self.analyzer = AnalysisAgent("analyzer", llm_config=llm_config) - else: - self.text_analyzer = TextAnalyzer(self.use_cache) + self.analyzer = AnalysisAgent("analyzer", llm_config=llm_config) - if self.db_method > 0: - self.memo_store = MemoStore(self.verbosity) - self.memo_store.prepopulate() - self.user_comments = [] # Stores user comments until the end of the chat. + self.memo_store = MemoStore(self.verbosity) + self.memo_store.prepopulate() + self.user_comments = [] # Stores user comments until the end of the chat. def delete_db(self): self.memo_store.db_client.reset() @@ -76,7 +65,7 @@ def _generate_teachable_assistant_reply( Generates a reply to the last user message, after querying the memo store for relevant information. Uses self.analyzer to make decisions about memo storage and retrieval. """ - if self.use_analyzer_agent and (sender == self.analyzer): + if sender == self.analyzer: # This is a response from the text analyzer. Don't reply to it. return True, None @@ -101,20 +90,19 @@ def _generate_teachable_assistant_reply( if user_text == 'new chat': self.clear_history() self.learn_from_recent_user_comments() - print(in_color("\n", 96)) + print(colored("\n", 'light_cyan')) return True, 'New chat started.' - if self.db_method > 0: - # This is a normal user turn. Keep track of it for potential storage later. - self.user_comments.append(user_text) + # This is a normal user turn. Keep track of it for potential storage later. + self.user_comments.append(user_text) - if self.memo_store.num_memos > 0: - # Consider whether to retrieve something from the DB. - new_user_text = self.consider_memo_retrieval(user_text, llm_config) - if new_user_text != user_text: - # Make a copy of the message list, and replace the last user message with the new one. - messages = messages.copy() - messages[-1]['content'] = new_user_text + if self.memo_store.num_memos > 0: + # Consider whether to retrieve something from the DB. + new_user_text = self.consider_memo_retrieval(user_text, llm_config) + if new_user_text != user_text: + # Make a copy of the message list, and replace the last user message with the new one. + messages = messages.copy() + messages[-1]['content'] = new_user_text ctxt = messages[-1].pop("context", None) # This peels off any "context" message from the list. msgs = self._oai_system_message + messages @@ -124,14 +112,13 @@ def _generate_teachable_assistant_reply( return True, response_text def learn_from_recent_user_comments(self): - print(in_color("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 96)) - if self.db_method > 0: - # Look at each user turn. - if len(self.user_comments) > 0: - for comment in self.user_comments: - # Consider whether to store something from this user turn in the DB. - self.consider_memo_storage(comment, self.llm_config) - self.user_comments = [] + print(colored("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 'light_cyan')) + # Look at each user turn. + if len(self.user_comments) > 0: + for comment in self.user_comments: + # Consider whether to store something from this user turn in the DB. + self.consider_memo_storage(comment, self.llm_config) + self.user_comments = [] def consider_memo_storage(self, comment, llm_config): """Decides whether to store something from this user turn in the DB.""" @@ -151,7 +138,7 @@ def consider_memo_storage(self, comment, llm_config): "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Add the task-advice (problem-solution) pair to the vector DB. if self.verbosity >= 1: - print(in_color("\nFOUND TASK-ADVICE PAIR", 92)) + print(colored("\nFOUND TASK-ADVICE PAIR", 'light_green')) self.memo_store.add_input_output_pair(general_task, advice) return @@ -174,7 +161,7 @@ def consider_memo_storage(self, comment, llm_config): "Briefly copy the information from the last user comment that may be useful later.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: - print(in_color("\nFOUND QUESTION-ANSWER PAIR", 92)) + print(colored("\nFOUND QUESTION-ANSWER PAIR", 'light_green')) self.memo_store.add_input_output_pair(question, answer) def consider_memo_retrieval(self, comment, llm_config): @@ -205,34 +192,30 @@ def consider_memo_retrieval(self, comment, llm_config): def retrieve_relevant_memos(self, input_text): if self.verbosity >= 1: - print(in_color('\nLOOK FOR RELEVANT MEMOS', 93)) + print(colored('\nLOOK FOR RELEVANT MEMOS', 'light_yellow')) memo_texts = '' - memos = self.memo_store.get_related_memos(input_text) + memos = self.memo_store.get_related_memos(input_text, threshold=self.recall_threshold) if self.verbosity >= 1: # Was anything retrieved? if len(memos) == 0: # No. Look at the closest memo. - print(in_color('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD...', 93)) + print(colored('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD...', 'light_yellow')) memo = self.memo_store.get_nearest_memo(input_text) print(memo) for memo in memos: info = "(Here is some information that might help:\n" + memo[1] + ")" if self.verbosity >= 1: - print(in_color('\nMEMO APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 93)) + print(colored('\nMEMO APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 'light_yellow')) memo_texts = memo_texts + '\n' + info return memo_texts def analyze(self, llm_config, text_to_analyze, analysis_instructions): - ### Calls either the AnalysisAgent or the older TextAnalyzer. """ - if self.use_analyzer_agent: - message_text = '\nAnalysis: '.join([text_to_analyze, analysis_instructions]) - self.initiate_chat(recipient=self.analyzer, message=message_text) - response_text = self.last_message(self.analyzer)["content"] - else: - response_text = self.text_analyzer.analyze(llm_config, text_to_analyze, analysis_instructions) - return response_text + """Combines the text to analyze with the analysis instructions, and sends them to the analyzer.""" + message_text = '\nAnalysis: '.join([text_to_analyze, analysis_instructions]) + self.initiate_chat(recipient=self.analyzer, message=message_text) + return self.last_message(self.analyzer)["content"] class MemoStore(): @@ -252,8 +235,8 @@ def add_input_output_pair(self, input_text, output_text): self.vec_db.add(documents=[input_text], ids=[str(self.next_uid)]) self.info_dict[str(self.next_uid)] = output_text if self.verbosity >= 1: - print(in_color("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}".format( - input_text, output_text), 92)) + print(colored("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}".format( + input_text, output_text), 'light_green')) def get_nearest_memo(self, query_text): """ Retrieves the nearest memo to the given query text. """ @@ -261,22 +244,22 @@ def get_nearest_memo(self, query_text): uid, input_text, distance = results['ids'][0][0], results['documents'][0][0], results['distances'][0][0] output_text = self.info_dict[uid] if self.verbosity >= 1: - print(in_color("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance), 92)) + print(colored("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance), 'light_green')) return input_text, output_text, distance - def get_related_memos(self, query_text, threshold=1.4): + def get_related_memos(self, query_text, n_results=4, threshold=1.4): """ Retrieves memos that are related to the given query text with the threshold. """ - results = self.vec_db.query(query_texts=[query_text], n_results=4) + results = self.vec_db.query(query_texts=[query_text], n_results=n_results) memos = [] for i in range(len(results['ids'])): uid, input_text, distance = results['ids'][i][0], results['documents'][i][0], results['distances'][i][0] if distance < threshold: output_text = self.info_dict[uid] if self.verbosity >= 1: - print(in_color( + print(colored( "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance), 92)) + input_text, output_text, distance), 'light_green')) memos.append((input_text, output_text, distance)) return memos diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 07503898ac4..60b10459931 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -3,17 +3,12 @@ from autogen.agentchat.contrib.teachable_agent import TeachableAgent -def in_color(text, color): - # Available colors: - # 90 = grey - # 91 = red - # 92 = green - # 93 = yellow - # 94 = blue - # 95 = magenta - # 96 = cyan - # 97 = white - return "\033[{}m".format(color) + text + "\033[0m" +try: + from termcolor import colored +except ImportError: + + def colored(x, *args, **kwargs): + return x def interact_freely_with_user(): @@ -27,12 +22,12 @@ def interact_freely_with_user(): user_proxy = UserProxyAgent("user_proxy", human_input_mode="ALWAYS") # Start the chat. - print(in_color("\nTo clear the context and start a new chat, type 'new chat'.", 96)) + print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) user_proxy.initiate_chat(assistant, message="Hi") def test_question_answer_pair(): - print(in_color("\n", 96)) + print(colored("\n", 'light_cyan')) # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints @@ -55,18 +50,18 @@ def test_question_answer_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(in_color("\n", 96)) + print(colored("\n", 'light_cyan')) user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") agent_response = user.last_message(agent) assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. # End of test agent.delete_db() - print(in_color("", 96)) + print(colored("", 'light_cyan')) def test_task_advice_pair(): - print(in_color("\n", 96)) + print(colored("\n", 'light_cyan')) # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints @@ -86,14 +81,14 @@ def test_task_advice_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(in_color("\n", 96)) + print(colored("\n", 'light_cyan')) user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") agent_response = user.last_message(agent) assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. # End of test agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. - print(in_color("", 96)) + print(colored("", 'light_cyan')) if __name__ == "__main__": @@ -104,4 +99,4 @@ def test_task_advice_pair(): test_question_answer_pair() test_task_advice_pair() - print(in_color("\n", 96)) + print(colored("\n", 'light_cyan')) From 18387a7537a20eb189d7f170ae78923093507555 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 10 Oct 2023 18:42:31 -0700 Subject: [PATCH 15/50] teach_config --- autogen/agentchat/contrib/teachable_agent.py | 28 +++++++++++++++----- test/agentchat/test_teachable_agent.py | 20 +++++++++++--- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 646cc0ab76b..973a733e587 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -20,15 +20,28 @@ class TeachableAgent(ConversableAgent): """ def __init__( self, - name: str, + name="Assistant", # default set to Assistant system_message: Optional[str] = "You are a helpful AI assistant.", llm_config: Optional[Union[Dict, bool]] = None, is_termination_msg: Optional[Callable[[Dict], bool]] = None, max_consecutive_auto_reply: Optional[int] = None, human_input_mode: Optional[str] = "NEVER", code_execution_config: Optional[Union[Dict, bool]] = False, + teach_config: Optional[Dict] = None, # config for the TeachableAgent **kwargs, ): + """ + Args: + name (str): name of the agent. Default "Assistant". + human_input_mode (str): NEVER ask for human input for this agent. + teach_config (dict or None): config for the TeachableAgent. + To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: + - verbosity (Optional, int): 1 to print DB operations, 2 to add caller details. Default 0. + - prepopulate (Optional, int): 1 to prepopulate the DB with a set of input-output pairs. Default 1. + - use_cache (Optional, bool): True to skip LLM calls made previously by relying on cached responses. Default False. + - recall_threshold (Optional, float): The distance threshold for retrieving memos from the DB. Default 1.5. + **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). + """ super().__init__( name, system_message, @@ -41,16 +54,17 @@ def __init__( ) self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply) - self.verbosity = 2 # 1 to print DB operations, 2 to add caller details. - self.prepopulate = 1 # 1 to prepopulate the DB with a set of input-output pairs. - self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. - self.recall_threshold = 1.4 # The distance threshold for retrieving memos from the DB. + self._teach_config = {} if teach_config is None else teach_config + self.verbosity = self._teach_config.get("verbosity", 0) + self.prepopulate = self._teach_config.get("prepopulate", 1) + self.use_cache = self._teach_config.get("use_cache", False) + self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) self.analyzer = AnalysisAgent("analyzer", llm_config=llm_config) self.memo_store = MemoStore(self.verbosity) self.memo_store.prepopulate() - self.user_comments = [] # Stores user comments until the end of the chat. + self.user_comments = [] # Stores user comments until the end of each chat. def delete_db(self): self.memo_store.db_client.reset() @@ -173,7 +187,7 @@ def consider_memo_retrieval(self, comment, llm_config): # Distinguish between a question and a task. memo_lookup_key = comment response = self.analyze(llm_config, comment, - "Would the last user comment be best described as a simple question, or some kind of task? Answer with just one word, question or task.") + "Does the last user comment contain a task to perform, or a simple question? Answer with just one word, task or question.") if 'task' in response.lower(): # Extract the task. task = self.analyze(llm_config, comment, diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 60b10459931..5fff6f9f6bc 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -11,6 +11,9 @@ def colored(x, *args, **kwargs): return x +verbosity = 2 + + def interact_freely_with_user(): # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints @@ -18,12 +21,15 @@ def interact_freely_with_user(): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # Create the agents. - assistant = TeachableAgent("assistant", llm_config={"config_list": config_list}) + agent = TeachableAgent( + name="assistant", + llm_config={"config_list": config_list}, + teach_config={"verbosity": verbosity}) user_proxy = UserProxyAgent("user_proxy", human_input_mode="ALWAYS") # Start the chat. print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) - user_proxy.initiate_chat(assistant, message="Hi") + user_proxy.initiate_chat(agent, message="Hi") def test_question_answer_pair(): @@ -35,7 +41,10 @@ def test_question_answer_pair(): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # Create the agents. - agent = TeachableAgent("agent", llm_config={"config_list": config_list}) + agent = TeachableAgent( + name="assistant", + llm_config={"config_list": config_list}, + teach_config={"verbosity": verbosity}) user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Ask the agent to do something using terminology it doesn't understand. @@ -69,7 +78,10 @@ def test_task_advice_pair(): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # Create the agents. - agent = TeachableAgent("agent", llm_config={"config_list": config_list}) + agent = TeachableAgent( + name="assistant", + llm_config={"config_list": config_list}, + teach_config={"verbosity": verbosity}) user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Ask the agent to do something, and provide some helpful advice. From 0f49f0df5a6ae480f5289a0fe18c16f2f9944dcb Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 11 Oct 2023 10:55:36 -0700 Subject: [PATCH 16/50] refactoring --- autogen/agentchat/contrib/teachable_agent.py | 1 - test/agentchat/test_teachable_agent.py | 79 +++++++++++--------- 2 files changed, 45 insertions(+), 35 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 973a733e587..d73ba71bd27 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -10,7 +10,6 @@ try: from termcolor import colored except ImportError: - def colored(x, *args, **kwargs): return x diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 5fff6f9f6bc..5f5f9e73530 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -6,20 +6,21 @@ try: from termcolor import colored except ImportError: - def colored(x, *args, **kwargs): return x verbosity = 2 +assert_on_error = False +# Load LLM inference endpoints from an env variable or a file +# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints +# and OAI_CONFIG_LIST_sample +config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) +# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-3.5-turbo-0613"]}) -def interact_freely_with_user(): - # Load LLM inference endpoints from an env variable or a file - # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints - # and OAI_CONFIG_LIST_sample - config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) +def interact_freely_with_user(): # Create the agents. agent = TeachableAgent( name="assistant", @@ -32,13 +33,19 @@ def interact_freely_with_user(): user_proxy.initiate_chat(agent, message="Hi") -def test_question_answer_pair(): - print(colored("\n", 'light_cyan')) +def check_agent_response(agent, user, correct_answer): + agent_response = user.last_message(agent)["content"] + if correct_answer not in agent_response: + print(colored(f"\n", 'light_red')) + if assert_on_error: + assert correct_answer in agent_response + return 1 + return 0 - # Load LLM inference endpoints from an env variable or a file - # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints - # and OAI_CONFIG_LIST_sample - config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) + +def test_question_answer_pair(): + print(colored("\n", 'light_cyan')) + num_errors = 0 # Create the agents. agent = TeachableAgent( @@ -52,8 +59,7 @@ def test_question_answer_pair(): # Explain the terminology to the agent. user.send(recipient=agent, message="The twist of two or more numbers is their product minus their sum.") - agent_response = user.last_message(agent) - assert '23' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 23. + num_errors += check_agent_response(agent, user, "23") # Let the agent remember things that should be learned from this chat. agent.learn_from_recent_user_comments() @@ -61,21 +67,20 @@ def test_question_answer_pair(): # Now start a new chat to clear the context, and require the agent to use its new knowledge. print(colored("\n", 'light_cyan')) user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") - agent_response = user.last_message(agent) - assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. + num_errors += check_agent_response(agent, user, "35") - # End of test - agent.delete_db() - print(colored("", 'light_cyan')) + # Wrap up. + if num_errors == 0: + print(colored("\n", 'light_cyan')) + else: + print(colored(f"\n", 'light_red')) + agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. + return num_errors def test_task_advice_pair(): - print(colored("\n", 'light_cyan')) - - # Load LLM inference endpoints from an env variable or a file - # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints - # and OAI_CONFIG_LIST_sample - config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) + print(colored("\n", 'light_cyan')) + num_errors = 0 # Create the agents. agent = TeachableAgent( @@ -86,8 +91,7 @@ def test_task_advice_pair(): # Ask the agent to do something, and provide some helpful advice. user.initiate_chat(recipient=agent, message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.") - agent_response = user.last_message(agent) - assert '23' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 23. + num_errors += check_agent_response(agent, user, "23") # Let the agent remember things that should be learned from this chat. agent.learn_from_recent_user_comments() @@ -95,12 +99,15 @@ def test_task_advice_pair(): # Now start a new chat to clear the context, and require the agent to use its new knowledge. print(colored("\n", 'light_cyan')) user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") - agent_response = user.last_message(agent) - assert '35' in agent_response["content"] # GPT-4 usually gets the right answer here, which is 35. + num_errors += check_agent_response(agent, user, "35") - # End of test + # Wrap up. + if num_errors == 0: + print(colored("\n", 'light_cyan')) + else: + print(colored(f"\n", 'light_red')) agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. - print(colored("", 'light_cyan')) + return num_errors if __name__ == "__main__": @@ -109,6 +116,10 @@ def test_task_advice_pair(): interact_freely_with_user() exit() - test_question_answer_pair() - test_task_advice_pair() - print(colored("\n", 'light_cyan')) + total_num_errors = 0 + total_num_errors += test_question_answer_pair() + total_num_errors += test_task_advice_pair() + if total_num_errors == 0: + print(colored("\n", 'light_cyan')) + else: + print(colored(f"\n", 'light_red')) From 09fde25c40191ddee0497cc6847ebde486600e83 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 11 Oct 2023 13:12:14 -0700 Subject: [PATCH 17/50] For robustness, allow more flexibility on memo storage and retrieval. --- autogen/agentchat/contrib/teachable_agent.py | 61 ++++++++++---------- test/agentchat/test_teachable_agent.py | 54 +++++++---------- 2 files changed, 51 insertions(+), 64 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index d73ba71bd27..0d41b9625c9 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -103,7 +103,7 @@ def _generate_teachable_assistant_reply( if user_text == 'new chat': self.clear_history() self.learn_from_recent_user_comments() - print(colored("\n", 'light_cyan')) + print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) return True, 'New chat started.' # This is a normal user turn. Keep track of it for potential storage later. @@ -151,16 +151,16 @@ def consider_memo_storage(self, comment, llm_config): "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Add the task-advice (problem-solution) pair to the vector DB. if self.verbosity >= 1: - print(colored("\nFOUND TASK-ADVICE PAIR", 'light_green')) + print(colored("\nREMEMBER THIS TASK-ADVICE PAIR", 'light_yellow')) self.memo_store.add_input_output_pair(general_task, advice) - return + # return - # Check for a simple question. - response = self.analyze(llm_config, comment, - "Does the last user comment contain a simple question? Answer with just one word, yes or no.") - if 'yes' in response.lower(): - # Ignore it. - return + # # Check for a simple question. + # response = self.analyze(llm_config, comment, + # "Does the last user comment contain a simple question? Answer with just one word, yes or no.") + # if 'yes' in response.lower(): + # # Ignore it. + # return # Check for information to be learned. response = self.analyze(llm_config, comment, @@ -174,34 +174,32 @@ def consider_memo_storage(self, comment, llm_config): "Briefly copy the information from the last user comment that may be useful later.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: - print(colored("\nFOUND QUESTION-ANSWER PAIR", 'light_green')) + print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", 'light_yellow')) self.memo_store.add_input_output_pair(question, answer) def consider_memo_retrieval(self, comment, llm_config): """Decides whether to retrieve something from the DB.""" - # Check for a question or task. + + # First, just use the user comment as the lookup key. + expanded_comment = comment + expanded_comment = expanded_comment + self.retrieve_relevant_memos(comment) + + # Next, if the comment involves a task, extract and generalize the task before using it as the lookup key. response = self.analyze(llm_config, comment, - "Does the last user comment contain a question, task, or problem to solve? Answer with just one word, yes or no.") + "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): - # Distinguish between a question and a task. - memo_lookup_key = comment - response = self.analyze(llm_config, comment, - "Does the last user comment contain a task to perform, or a simple question? Answer with just one word, task or question.") - if 'task' in response.lower(): - # Extract the task. - task = self.analyze(llm_config, comment, - "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") - # Generalize the task. - general_task = self.analyze(llm_config, task, - "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") - # Use the generalized task as the lookup key. - memo_lookup_key = general_task - + # Extract the task. + task = self.analyze(llm_config, comment, + "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") + # Generalize the task. + general_task = self.analyze(llm_config, task, + "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Append any relevant memos. - return comment + self.retrieve_relevant_memos(memo_lookup_key) + expanded_comment = expanded_comment + self.retrieve_relevant_memos(general_task) + + # Need to de-duplicate the memos. - # For anything else, just return the user comment. - return comment + return expanded_comment def retrieve_relevant_memos(self, input_text): if self.verbosity >= 1: @@ -265,8 +263,9 @@ def get_related_memos(self, query_text, n_results=4, threshold=1.4): """ Retrieves memos that are related to the given query text with the threshold. """ results = self.vec_db.query(query_texts=[query_text], n_results=n_results) memos = [] - for i in range(len(results['ids'])): - uid, input_text, distance = results['ids'][i][0], results['documents'][i][0], results['distances'][i][0] + num_results = len(results['ids'][0]) + for i in range(num_results): + uid, input_text, distance = results['ids'][0][i], results['documents'][0][i], results['distances'][0][i] if distance < threshold: output_text = self.info_dict[uid] if self.verbosity >= 1: diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 5f5f9e73530..2c7187d8436 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -20,38 +20,39 @@ def colored(x, *args, **kwargs): # config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-3.5-turbo-0613"]}) -def interact_freely_with_user(): - # Create the agents. +def create_teachable_agent(): agent = TeachableAgent( name="assistant", llm_config={"config_list": config_list}, - teach_config={"verbosity": verbosity}) - user_proxy = UserProxyAgent("user_proxy", human_input_mode="ALWAYS") + teach_config={"verbosity": verbosity, "recall_threshold": 1.5}) + return agent + + +def interact_freely_with_user(): + agent = create_teachable_agent() + user = UserProxyAgent("user", human_input_mode="ALWAYS") # Start the chat. print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) - user_proxy.initiate_chat(agent, message="Hi") + user.initiate_chat(agent, message="Hi") def check_agent_response(agent, user, correct_answer): agent_response = user.last_message(agent)["content"] if correct_answer not in agent_response: - print(colored(f"\n", 'light_red')) + print(colored(f"\nTEST FAILED: EXPECTED ANSWER {correct_answer} NOT FOUND IN AGENT RESPONSE", 'light_red')) if assert_on_error: assert correct_answer in agent_response return 1 - return 0 + else: + print(colored(f"\nTEST PASSED: EXPECTED ANSWER {correct_answer} FOUND IN AGENT RESPONSE", 'light_cyan')) + return 0 def test_question_answer_pair(): - print(colored("\n", 'light_cyan')) + print(colored("\nTEST QUESTION-ANSWER PAIRS", 'light_cyan')) num_errors = 0 - - # Create the agents. - agent = TeachableAgent( - name="assistant", - llm_config={"config_list": config_list}, - teach_config={"verbosity": verbosity}) + agent = create_teachable_agent() user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Ask the agent to do something using terminology it doesn't understand. @@ -65,28 +66,19 @@ def test_question_answer_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(colored("\n", 'light_cyan')) + print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") num_errors += check_agent_response(agent, user, "35") # Wrap up. - if num_errors == 0: - print(colored("\n", 'light_cyan')) - else: - print(colored(f"\n", 'light_red')) agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. return num_errors def test_task_advice_pair(): - print(colored("\n", 'light_cyan')) + print(colored("\nTEST TASK-ADVICE PAIRS", 'light_cyan')) num_errors = 0 - - # Create the agents. - agent = TeachableAgent( - name="assistant", - llm_config={"config_list": config_list}, - teach_config={"verbosity": verbosity}) + agent = create_teachable_agent() user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Ask the agent to do something, and provide some helpful advice. @@ -97,15 +89,11 @@ def test_task_advice_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(colored("\n", 'light_cyan')) + print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") num_errors += check_agent_response(agent, user, "35") # Wrap up. - if num_errors == 0: - print(colored("\n", 'light_cyan')) - else: - print(colored(f"\n", 'light_red')) agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. return num_errors @@ -120,6 +108,6 @@ def test_task_advice_pair(): total_num_errors += test_question_answer_pair() total_num_errors += test_task_advice_pair() if total_num_errors == 0: - print(colored("\n", 'light_cyan')) + print(colored("\nTEACHABLE AGENT TESTS COMPLETED SUCCESSFULLY", 'light_cyan')) else: - print(colored(f"\n", 'light_red')) + print(colored(f"\nTEACHABLE AGENT TESTS COMPLETED WITH {total_num_errors} TOTAL ERRORS", 'light_red')) From adac582be9efa15099aad0593a1e63d2564f49f7 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 11 Oct 2023 13:57:23 -0700 Subject: [PATCH 18/50] de-dupe the retrieved memos. --- autogen/agentchat/contrib/teachable_agent.py | 74 ++++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 0d41b9625c9..889350d8bb0 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -111,7 +111,7 @@ def _generate_teachable_assistant_reply( if self.memo_store.num_memos > 0: # Consider whether to retrieve something from the DB. - new_user_text = self.consider_memo_retrieval(user_text, llm_config) + new_user_text = self.consider_memo_retrieval(user_text) if new_user_text != user_text: # Make a copy of the message list, and replace the last user message with the new one. messages = messages.copy() @@ -130,99 +130,99 @@ def learn_from_recent_user_comments(self): if len(self.user_comments) > 0: for comment in self.user_comments: # Consider whether to store something from this user turn in the DB. - self.consider_memo_storage(comment, self.llm_config) + self.consider_memo_storage(comment) self.user_comments = [] - def consider_memo_storage(self, comment, llm_config): + def consider_memo_storage(self, comment): """Decides whether to store something from this user turn in the DB.""" # Check for a problem-solution pair. - response = self.analyze(llm_config, comment, + response = self.analyze(comment, "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Can we extract advice? - advice = self.analyze(llm_config, comment, + advice = self.analyze(comment, "Briefly copy any advice from the last user comment that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") if 'none' not in advice.lower(): # Yes. Extract the task. - task = self.analyze(llm_config, comment, + task = self.analyze(comment, "Briefly copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") # Generalize the task. - general_task = self.analyze(llm_config, task, + general_task = self.analyze(task, "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Add the task-advice (problem-solution) pair to the vector DB. if self.verbosity >= 1: print(colored("\nREMEMBER THIS TASK-ADVICE PAIR", 'light_yellow')) self.memo_store.add_input_output_pair(general_task, advice) - # return - - # # Check for a simple question. - # response = self.analyze(llm_config, comment, - # "Does the last user comment contain a simple question? Answer with just one word, yes or no.") - # if 'yes' in response.lower(): - # # Ignore it. - # return # Check for information to be learned. - response = self.analyze(llm_config, comment, + response = self.analyze(comment, "Does the last user comment contain information that might be useful later? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Yes. What question would this information answer? - question = self.analyze(llm_config, comment, + question = self.analyze(comment, "Imagine that the user forgot this information in their last comment. How would they ask you for this information? Include no other text in your response.") # Extract the information. - answer = self.analyze(llm_config, comment, + answer = self.analyze(comment, "Briefly copy the information from the last user comment that may be useful later.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", 'light_yellow')) self.memo_store.add_input_output_pair(question, answer) - def consider_memo_retrieval(self, comment, llm_config): + def consider_memo_retrieval(self, comment): """Decides whether to retrieve something from the DB.""" - # First, just use the user comment as the lookup key. - expanded_comment = comment - expanded_comment = expanded_comment + self.retrieve_relevant_memos(comment) + # First, use the user comment directly as the lookup key. + if self.verbosity >= 1: + print(colored('\nLOOK FOR RELEVANT MEMOS, AS QUESTION-ANSWER PAIRS', 'light_yellow')) + memo_list = self.retrieve_relevant_memos(comment) - # Next, if the comment involves a task, extract and generalize the task before using it as the lookup key. - response = self.analyze(llm_config, comment, + # Next, if the comment involves a task, then extract and generalize the task before using it as the lookup key. + response = self.analyze(comment, "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Extract the task. - task = self.analyze(llm_config, comment, + task = self.analyze(comment, "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") # Generalize the task. - general_task = self.analyze(llm_config, task, + general_task = self.analyze(task, "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") # Append any relevant memos. - expanded_comment = expanded_comment + self.retrieve_relevant_memos(general_task) + if self.verbosity >= 1: + print(colored('\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS', 'light_yellow')) + memo_list.extend(self.retrieve_relevant_memos(general_task)) - # Need to de-duplicate the memos. + # De-duplicate the memo list. + memo_list = list(set(memo_list)) - return expanded_comment + # Append the memos to the last user message. + return comment + self.concatenate_memo_texts(memo_list) def retrieve_relevant_memos(self, input_text): - if self.verbosity >= 1: - print(colored('\nLOOK FOR RELEVANT MEMOS', 'light_yellow')) - memo_texts = '' - memos = self.memo_store.get_related_memos(input_text, threshold=self.recall_threshold) + memo_list = self.memo_store.get_related_memos(input_text, threshold=self.recall_threshold) if self.verbosity >= 1: # Was anything retrieved? - if len(memos) == 0: + if len(memo_list) == 0: # No. Look at the closest memo. print(colored('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD...', 'light_yellow')) memo = self.memo_store.get_nearest_memo(input_text) print(memo) - for memo in memos: - info = "(Here is some information that might help:\n" + memo[1] + ")" + # Create a list of just the memo output_text strings. + memo_list = [memo[1] for memo in memo_list] + return memo_list + + def concatenate_memo_texts(self, memo_list): + memo_texts = '' + for memo in memo_list: + info = "(Here is some information that might help:\n" + memo + ")" if self.verbosity >= 1: print(colored('\nMEMO APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 'light_yellow')) memo_texts = memo_texts + '\n' + info return memo_texts - def analyze(self, llm_config, text_to_analyze, analysis_instructions): + def analyze(self, text_to_analyze, analysis_instructions): """Combines the text to analyze with the analysis instructions, and sends them to the analyzer.""" message_text = '\nAnalysis: '.join([text_to_analyze, analysis_instructions]) self.initiate_chat(recipient=self.analyzer, message=message_text) From 94d8ba0704f437550dcc6cd1c757b8ed396956ea Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 11 Oct 2023 18:54:28 -0700 Subject: [PATCH 19/50] Simplify AnalysisAgent. The unit tests now pass with gpt-3.5 --- autogen/agentchat/contrib/analysis_agent.py | 33 +++++-------- autogen/agentchat/contrib/teachable_agent.py | 52 ++++++++------------ test/agentchat/test_teachable_agent.py | 8 ++- 3 files changed, 37 insertions(+), 56 deletions(-) diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/analysis_agent.py index a84e80e9f71..b978af5c3ce 100644 --- a/autogen/agentchat/contrib/analysis_agent.py +++ b/autogen/agentchat/contrib/analysis_agent.py @@ -39,31 +39,20 @@ def _generate_analysis( config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: """Analyzes the given text as instructed, and returns the analysis.""" - # Are the following tests necessary? - assert config is None # TODO: Remove this line. - llm_config = self.llm_config if config is None else config - - assert llm_config is not False # TODO: Remove this line. - if llm_config is False: - return False, None - - assert messages is not None # TODO: Remove this line. + if self.llm_config is False: + return False, None # Return if no LLM was provided. if messages is None: - messages = self._oai_messages[sender] - - # Extract the text and instructions from the last user message. - user_text = messages[-1]['content'] - text_to_analyze, analysis_instructions = user_text.split('\nAnalysis: ') - - # Assemble the messages. - messages = [ - {"role": "user", "content": text_to_analyze}, - {"role": "user", "content": analysis_instructions} - ] + messages = self._oai_messages[sender] # In case of a direct call. - msgs = self._oai_system_message + messages + # Assemble the message. + assert len(messages) == 2 + text_to_analyze = messages[0]['content'] + analysis_instructions = messages[1]['content'] + msg_text = 'INSTRUCTIONS: ' + analysis_instructions + '\n' + 'TEXT: ' + text_to_analyze + '\n' + 'INSTRUCTIONS: ' + analysis_instructions + messages = self._oai_system_message + [{"role": "user", "content": msg_text}] - response = oai.ChatCompletion.create(context=None, messages=msgs, use_cache=self.use_cache, **llm_config) + # Generate the analysis. + response = oai.ChatCompletion.create(context=None, messages=messages, use_cache=self.use_cache, **self.llm_config) response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] return True, response_text diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 889350d8bb0..ad6bccf6d21 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -72,29 +72,16 @@ def _generate_teachable_assistant_reply( self, messages: Optional[List[Dict]] = None, sender: Optional[Agent] = None, - config: Optional[Any] = None, + config: Optional[Any] = None, # Persistent state. ) -> Tuple[bool, Union[str, Dict, None]]: """ Generates a reply to the last user message, after querying the memo store for relevant information. Uses self.analyzer to make decisions about memo storage and retrieval. """ - if sender == self.analyzer: - # This is a response from the text analyzer. Don't reply to it. - return True, None - - # Are the following checks needed? - assert config is None # TODO: Remove this line. - llm_config = self.llm_config if config is None else config - - assert llm_config is not False # TODO: Remove this line. - if llm_config is False: - return False, None - - assert messages is not None # TODO: Remove this line. + if self.llm_config is False: + return False, None # Return if no LLM was provided. if messages is None: - messages = self._oai_messages[sender] - - # messages contains the previous chat history, excluding the system message. + messages = self._oai_messages[sender] # In case of a direct call. # Get the last user message. user_text = messages[-1]['content'] @@ -119,7 +106,7 @@ def _generate_teachable_assistant_reply( ctxt = messages[-1].pop("context", None) # This peels off any "context" message from the list. msgs = self._oai_system_message + messages - response = oai.ChatCompletion.create(context=ctxt, messages=msgs, use_cache=self.use_cache, **llm_config) + response = oai.ChatCompletion.create(context=ctxt, messages=msgs, use_cache=self.use_cache, **self.llm_config) response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] return True, response_text @@ -137,18 +124,18 @@ def consider_memo_storage(self, comment): """Decides whether to store something from this user turn in the DB.""" # Check for a problem-solution pair. response = self.analyze(comment, - "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") + "Does the TEXT contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Can we extract advice? advice = self.analyze(comment, - "Briefly copy any advice from the last user comment that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") + "Briefly copy any advice from the TEXT that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") if 'none' not in advice.lower(): # Yes. Extract the task. task = self.analyze(comment, - "Briefly copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") + "Briefly copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.") # Generalize the task. general_task = self.analyze(task, - "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") + "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.") # Add the task-advice (problem-solution) pair to the vector DB. if self.verbosity >= 1: print(colored("\nREMEMBER THIS TASK-ADVICE PAIR", 'light_yellow')) @@ -156,14 +143,14 @@ def consider_memo_storage(self, comment): # Check for information to be learned. response = self.analyze(comment, - "Does the last user comment contain information that might be useful later? Answer with just one word, yes or no.") + "Does the TEXT contain information that might be useful later? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Yes. What question would this information answer? question = self.analyze(comment, - "Imagine that the user forgot this information in their last comment. How would they ask you for this information? Include no other text in your response.") + "Imagine that the user forgot this information in the TEXT. How would they ask you for this information? Include no other text in your response.") # Extract the information. answer = self.analyze(comment, - "Briefly copy the information from the last user comment that may be useful later.") + "Briefly copy the information from the TEXT that may be useful later.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", 'light_yellow')) @@ -179,14 +166,14 @@ def consider_memo_retrieval(self, comment): # Next, if the comment involves a task, then extract and generalize the task before using it as the lookup key. response = self.analyze(comment, - "Does the last user comment contain a task or problem to solve? Answer with just one word, yes or no.") + "Does the TEXT contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Extract the task. task = self.analyze(comment, - "Copy just the task from the last user comment, then stop. Don't solve it, and don't include any advice.") + "Copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.") # Generalize the task. general_task = self.analyze(task, - "Summarize very briefly, in general terms, the type of task described in the last user comment. Leave out details that might not appear in a similar problem.") + "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.") # Append any relevant memos. if self.verbosity >= 1: print(colored('\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS', 'light_yellow')) @@ -207,7 +194,7 @@ def retrieve_relevant_memos(self, input_text): # No. Look at the closest memo. print(colored('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD...', 'light_yellow')) memo = self.memo_store.get_nearest_memo(input_text) - print(memo) + print() # Print a blank line. The memo details were printed by get_nearest_memo(). # Create a list of just the memo output_text strings. memo_list = [memo[1] for memo in memo_list] @@ -223,9 +210,10 @@ def concatenate_memo_texts(self, memo_list): return memo_texts def analyze(self, text_to_analyze, analysis_instructions): - """Combines the text to analyze with the analysis instructions, and sends them to the analyzer.""" - message_text = '\nAnalysis: '.join([text_to_analyze, analysis_instructions]) - self.initiate_chat(recipient=self.analyzer, message=message_text) + """Sends the text to analyze and the analysis instructions to the analyzer.""" + self.analyzer.reset() # Clear the analyzer's list of messages. + self.send(recipient=self.analyzer, message=text_to_analyze, request_reply=False) # Put the message in the analyzer's list. + self.send(recipient=self.analyzer, message=analysis_instructions, request_reply=True) # Request the reply. return self.last_message(self.analyzer)["content"] diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 2c7187d8436..7aaee30245d 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -16,8 +16,12 @@ def colored(x, *args, **kwargs): # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample -config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) -# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-3.5-turbo-0613"]}) + +# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # OpenAI +# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-3.5-turbo-0613"]}) # OpenAI + +# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4"]}) # Azure +config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-35-turbo-16k"]}) # Azure def create_teachable_agent(): From 30503bf10deb1cc65242efe75b78739a3eb5d812 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 12 Oct 2023 13:10:19 -0700 Subject: [PATCH 20/50] comments --- autogen/agentchat/contrib/analysis_agent.py | 21 ++++++------- autogen/agentchat/contrib/teachable_agent.py | 4 +-- test/agentchat/test_teachable_agent.py | 33 ++++++++++++-------- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/analysis_agent.py index b978af5c3ce..19f51293e39 100644 --- a/autogen/agentchat/contrib/analysis_agent.py +++ b/autogen/agentchat/contrib/analysis_agent.py @@ -5,12 +5,11 @@ class AnalysisAgent(ConversableAgent): - """(Ongoing research) Text Analysis agent. - """ + """Text Analysis agent, a subclass of ConversableAgent designed to answer specific questions about text.""" def __init__( self, name: str, - system_message: Optional[str] = "You are a helpful assistant specializing in content analysis.", + system_message: Optional[str] = "You are an expert in text analysis.", llm_config: Optional[Union[Dict, bool]] = None, is_termination_msg: Optional[Callable[[Dict], bool]] = None, max_consecutive_auto_reply: Optional[int] = None, @@ -28,17 +27,18 @@ def __init__( llm_config=llm_config, **kwargs, ) - self.register_reply(Agent, AnalysisAgent._generate_analysis) - + self.register_reply(Agent, AnalysisAgent._analyze_text) self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. - def _generate_analysis( + def _analyze_text( self, messages: Optional[List[Dict]] = None, sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: - """Analyzes the given text as instructed, and returns the analysis.""" + """Analyzes the given text as instructed, and returns the analysis. + Assumes exactly two messages containing the text to analyze and the analysis instructions respectively. + See TeachableAgent.analyze for an example of how to use this method.""" if self.llm_config is False: return False, None # Return if no LLM was provided. if messages is None: @@ -46,13 +46,12 @@ def _generate_analysis( # Assemble the message. assert len(messages) == 2 - text_to_analyze = messages[0]['content'] - analysis_instructions = messages[1]['content'] - msg_text = 'INSTRUCTIONS: ' + analysis_instructions + '\n' + 'TEXT: ' + text_to_analyze + '\n' + 'INSTRUCTIONS: ' + analysis_instructions + text_to_analyze = 'TEXT: ' + messages[0]['content'] + analysis_instructions = 'INSTRUCTIONS: ' + messages[1]['content'] + msg_text = '\n'.join([analysis_instructions, text_to_analyze, analysis_instructions]) # Repeat instructions. messages = self._oai_system_message + [{"role": "user", "content": msg_text}] # Generate the analysis. response = oai.ChatCompletion.create(context=None, messages=messages, use_cache=self.use_cache, **self.llm_config) response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] - return True, response_text diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index ad6bccf6d21..700b8dd2ab4 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -168,6 +168,8 @@ def consider_memo_retrieval(self, comment): response = self.analyze(comment, "Does the TEXT contain a task or problem to solve? Answer with just one word, yes or no.") if 'yes' in response.lower(): + if self.verbosity >= 1: + print(colored('\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS', 'light_yellow')) # Extract the task. task = self.analyze(comment, "Copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.") @@ -175,8 +177,6 @@ def consider_memo_retrieval(self, comment): general_task = self.analyze(task, "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.") # Append any relevant memos. - if self.verbosity >= 1: - print(colored('\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS', 'light_yellow')) memo_list.extend(self.retrieve_relevant_memos(general_task)) # De-duplicate the memo list. diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 7aaee30245d..2fec03eed3f 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -10,38 +10,32 @@ def colored(x, *args, **kwargs): return x -verbosity = 2 -assert_on_error = False +verbosity = 2 # 0 to print chat messages, 1 to add DB operations, 2 to add caller details. +assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 can sometimes fail. +recall_threshold = 1.5 # Higher numbers allow more memos to be recalled, but can also lead to more false positives. # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample +# Define a config_list for the specific model you want to use. # config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # OpenAI # config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-3.5-turbo-0613"]}) # OpenAI - # config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4"]}) # Azure config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-35-turbo-16k"]}) # Azure def create_teachable_agent(): + """Instantiates a TeachableAgent using the settings from the top of this file.""" agent = TeachableAgent( name="assistant", llm_config={"config_list": config_list}, - teach_config={"verbosity": verbosity, "recall_threshold": 1.5}) + teach_config={"verbosity": verbosity, "recall_threshold": recall_threshold}) return agent -def interact_freely_with_user(): - agent = create_teachable_agent() - user = UserProxyAgent("user", human_input_mode="ALWAYS") - - # Start the chat. - print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) - user.initiate_chat(agent, message="Hi") - - def check_agent_response(agent, user, correct_answer): + """Checks whether the agent's response contains the correct answer, and returns the number of errors (1 or 0).""" agent_response = user.last_message(agent)["content"] if correct_answer not in agent_response: print(colored(f"\nTEST FAILED: EXPECTED ANSWER {correct_answer} NOT FOUND IN AGENT RESPONSE", 'light_red')) @@ -53,7 +47,18 @@ def check_agent_response(agent, user, correct_answer): return 0 +def interact_freely_with_user(): + """Starts a free-form chat between the user and a TeachableAgent.""" + agent = create_teachable_agent() + user = UserProxyAgent("user", human_input_mode="ALWAYS") + + # Start the chat. + print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) + user.initiate_chat(agent, message="Hi") + + def test_question_answer_pair(): + """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" print(colored("\nTEST QUESTION-ANSWER PAIRS", 'light_cyan')) num_errors = 0 agent = create_teachable_agent() @@ -80,6 +85,7 @@ def test_question_answer_pair(): def test_task_advice_pair(): + """Tests whether the agent can recall and use advice after being taught a task-advice pair in a previous chat.""" print(colored("\nTEST TASK-ADVICE PAIRS", 'light_cyan')) num_errors = 0 agent = create_teachable_agent() @@ -103,6 +109,7 @@ def test_task_advice_pair(): if __name__ == "__main__": + """Runs the unit tests from above, unless the user adds 'interactive' or 'i' as a commandline argument.""" if len(sys.argv) > 1: if sys.argv[1].startswith('i'): interact_freely_with_user() From 4d2cedd02668a767aa20b1910e3759b703f2a60d Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 12 Oct 2023 15:55:10 -0700 Subject: [PATCH 21/50] Add a verbosity level to control analyzer messages. --- autogen/agentchat/contrib/analysis_agent.py | 22 ++++++++++-------- autogen/agentchat/contrib/teachable_agent.py | 24 ++++++++++++-------- test/agentchat/test_teachable_agent.py | 4 ++-- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/analysis_agent.py index 19f51293e39..5ecef680697 100644 --- a/autogen/agentchat/contrib/analysis_agent.py +++ b/autogen/agentchat/contrib/analysis_agent.py @@ -27,10 +27,10 @@ def __init__( llm_config=llm_config, **kwargs, ) - self.register_reply(Agent, AnalysisAgent._analyze_text) + self.register_reply(Agent, AnalysisAgent._analyze_in_reply) self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. - def _analyze_text( + def _analyze_in_reply( self, messages: Optional[List[Dict]] = None, sender: Optional[Agent] = None, @@ -43,15 +43,19 @@ def _analyze_text( return False, None # Return if no LLM was provided. if messages is None: messages = self._oai_messages[sender] # In case of a direct call. + assert len(messages) == 2 + + # Delegate to the analysis method. + return True, self.analyze_text(messages[0]['content'], messages[1]['content']) + def analyze_text(self, text_to_analyze, analysis_instructions): + """Analyzes the given text as instructed, and returns the analysis.""" # Assemble the message. - assert len(messages) == 2 - text_to_analyze = 'TEXT: ' + messages[0]['content'] - analysis_instructions = 'INSTRUCTIONS: ' + messages[1]['content'] - msg_text = '\n'.join([analysis_instructions, text_to_analyze, analysis_instructions]) # Repeat instructions. + text_to_analyze = 'TEXT: ' + text_to_analyze + analysis_instructions = 'INSTRUCTIONS: ' + analysis_instructions + msg_text = '\n'.join([analysis_instructions, text_to_analyze, analysis_instructions]) # Repeat the instructions. messages = self._oai_system_message + [{"role": "user", "content": msg_text}] - # Generate the analysis. + # Generate and return the analysis string. response = oai.ChatCompletion.create(context=None, messages=messages, use_cache=self.use_cache, **self.llm_config) - response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] - return True, response_text + return oai.ChatCompletion.extract_text_or_function_call(response)[0] diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 700b8dd2ab4..3f33ce32f4c 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -19,8 +19,8 @@ class TeachableAgent(ConversableAgent): """ def __init__( self, - name="Assistant", # default set to Assistant - system_message: Optional[str] = "You are a helpful AI assistant.", + name="Agent", # default set to Assistant + system_message: Optional[str] = "You are a helpful AI assistant with memory of prior chats.", llm_config: Optional[Union[Dict, bool]] = None, is_termination_msg: Optional[Callable[[Dict], bool]] = None, max_consecutive_auto_reply: Optional[int] = None, @@ -35,7 +35,7 @@ def __init__( human_input_mode (str): NEVER ask for human input for this agent. teach_config (dict or None): config for the TeachableAgent. To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: - - verbosity (Optional, int): 1 to print DB operations, 2 to add caller details. Default 0. + - verbosity (Optional, int): 1 to include memory operations, 2 to add analyzer messages. Default 0. - prepopulate (Optional, int): 1 to prepopulate the DB with a set of input-output pairs. Default 1. - use_cache (Optional, bool): True to skip LLM calls made previously by relying on cached responses. Default False. - recall_threshold (Optional, float): The distance threshold for retrieving memos from the DB. Default 1.5. @@ -112,7 +112,8 @@ def _generate_teachable_assistant_reply( return True, response_text def learn_from_recent_user_comments(self): - print(colored("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 'light_cyan')) + if self.verbosity >= 1: + print(colored("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 'light_green')) # Look at each user turn. if len(self.user_comments) > 0: for comment in self.user_comments: @@ -203,7 +204,7 @@ def retrieve_relevant_memos(self, input_text): def concatenate_memo_texts(self, memo_list): memo_texts = '' for memo in memo_list: - info = "(Here is some information that might help:\n" + memo + ")" + info = "(A memory that might help:\n" + memo + ")" if self.verbosity >= 1: print(colored('\nMEMO APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 'light_yellow')) memo_texts = memo_texts + '\n' + info @@ -211,10 +212,15 @@ def concatenate_memo_texts(self, memo_list): def analyze(self, text_to_analyze, analysis_instructions): """Sends the text to analyze and the analysis instructions to the analyzer.""" - self.analyzer.reset() # Clear the analyzer's list of messages. - self.send(recipient=self.analyzer, message=text_to_analyze, request_reply=False) # Put the message in the analyzer's list. - self.send(recipient=self.analyzer, message=analysis_instructions, request_reply=True) # Request the reply. - return self.last_message(self.analyzer)["content"] + if self.verbosity >= 2: + # Use the messaging mechanism so that the analyzer's messages are included in the printed chat. + self.analyzer.reset() # Clear the analyzer's list of messages. + self.send(recipient=self.analyzer, message=text_to_analyze, request_reply=False) # Put the message in the analyzer's list. + self.send(recipient=self.analyzer, message=analysis_instructions, request_reply=True) # Request the reply. + return self.last_message(self.analyzer)["content"] + else: + # Use the analyzer's method directly, to leave analyzer message out of the printed chat. + return self.analyzer.analyze_text(text_to_analyze, analysis_instructions) class MemoStore(): diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 2fec03eed3f..d9a0a74b7a6 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -10,7 +10,7 @@ def colored(x, *args, **kwargs): return x -verbosity = 2 # 0 to print chat messages, 1 to add DB operations, 2 to add caller details. +verbosity = 2 # 1 to include memory operations, 2 to add analyzer messages. assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 can sometimes fail. recall_threshold = 1.5 # Higher numbers allow more memos to be recalled, but can also lead to more false positives. @@ -28,7 +28,7 @@ def colored(x, *args, **kwargs): def create_teachable_agent(): """Instantiates a TeachableAgent using the settings from the top of this file.""" agent = TeachableAgent( - name="assistant", + name="agent", llm_config={"config_list": config_list}, teach_config={"verbosity": verbosity, "recall_threshold": recall_threshold}) return agent From 8a15d0afe9974e15382a8c343541d4b11fd381a3 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 12 Oct 2023 16:03:46 -0700 Subject: [PATCH 22/50] refactoring --- autogen/agentchat/contrib/teachable_agent.py | 6 ++--- autogen/agentchat/contrib/text_analyzer.py | 23 ------------------- ...alysis_agent.py => text_analyzer_agent.py} | 4 ++-- test/agentchat/test_teachable_agent.py | 2 +- 4 files changed, 6 insertions(+), 29 deletions(-) delete mode 100644 autogen/agentchat/contrib/text_analyzer.py rename autogen/agentchat/contrib/{analysis_agent.py => text_analyzer_agent.py} (96%) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 3f33ce32f4c..769301291a6 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -1,7 +1,7 @@ from autogen import oai from autogen.agentchat.agent import Agent from autogen.agentchat.assistant_agent import ConversableAgent -from autogen.agentchat.contrib.analysis_agent import AnalysisAgent +from autogen.agentchat.contrib.text_analyzer_agent import TextAnalyzerAgent from typing import Callable, Dict, Optional, Union, List, Tuple, Any import chromadb from chromadb.config import Settings @@ -59,7 +59,7 @@ def __init__( self.use_cache = self._teach_config.get("use_cache", False) self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) - self.analyzer = AnalysisAgent("analyzer", llm_config=llm_config) + self.analyzer = TextAnalyzerAgent("analyzer", llm_config=llm_config) self.memo_store = MemoStore(self.verbosity) self.memo_store.prepopulate() @@ -113,7 +113,7 @@ def _generate_teachable_assistant_reply( def learn_from_recent_user_comments(self): if self.verbosity >= 1: - print(colored("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 'light_green')) + print(colored("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 'light_yellow')) # Look at each user turn. if len(self.user_comments) > 0: for comment in self.user_comments: diff --git a/autogen/agentchat/contrib/text_analyzer.py b/autogen/agentchat/contrib/text_analyzer.py deleted file mode 100644 index dad66a3b19f..00000000000 --- a/autogen/agentchat/contrib/text_analyzer.py +++ /dev/null @@ -1,23 +0,0 @@ -from autogen import oai - - -class TextAnalyzer(): - """ Analyzes the content of text as instructed in each call. """ - def __init__(self, use_cache): - self.use_cache = use_cache - - # Prepare the system prompt. - system_message_text = """You are a helpful assistant specializing in content analysis.""" - system_message = {"role": "system", "content": system_message_text} - self.base_messages = [system_message] - - def analyze(self, llm_config, text_to_analyze, analysis_instructions): - # Assembled the messages. - messages = self.base_messages.copy() - messages.append({"role": "user", "content": text_to_analyze}) - messages.append({"role": "user", "content": analysis_instructions}) - - # Get the response. - response = oai.ChatCompletion.create(context=None, messages=messages, use_cache=self.use_cache, **llm_config) - response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] - return response_text diff --git a/autogen/agentchat/contrib/analysis_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py similarity index 96% rename from autogen/agentchat/contrib/analysis_agent.py rename to autogen/agentchat/contrib/text_analyzer_agent.py index 5ecef680697..f242bb64d15 100644 --- a/autogen/agentchat/contrib/analysis_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -4,7 +4,7 @@ from typing import Callable, Dict, Optional, Union, List, Tuple, Any -class AnalysisAgent(ConversableAgent): +class TextAnalyzerAgent(ConversableAgent): """Text Analysis agent, a subclass of ConversableAgent designed to answer specific questions about text.""" def __init__( self, @@ -27,7 +27,7 @@ def __init__( llm_config=llm_config, **kwargs, ) - self.register_reply(Agent, AnalysisAgent._analyze_in_reply) + self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply) self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. def _analyze_in_reply( diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index d9a0a74b7a6..06bc1c66975 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -10,7 +10,7 @@ def colored(x, *args, **kwargs): return x -verbosity = 2 # 1 to include memory operations, 2 to add analyzer messages. +verbosity = 1 # 1 to include memory operations, 2 to add analyzer messages. assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 can sometimes fail. recall_threshold = 1.5 # Higher numbers allow more memos to be recalled, but can also lead to more false positives. From 4806eefd365dc4af933056d29e67c5f559256ef3 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 12 Oct 2023 16:23:11 -0700 Subject: [PATCH 23/50] comments --- autogen/agentchat/contrib/teachable_agent.py | 36 ++++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 769301291a6..c6b7c122582 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -15,12 +15,11 @@ def colored(x, *args, **kwargs): class TeachableAgent(ConversableAgent): - """(Ongoing research) Teachable Assistant agent, using a vector database as a memory store. - """ + """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings.""" def __init__( self, name="Agent", # default set to Assistant - system_message: Optional[str] = "You are a helpful AI assistant with memory of prior chats.", + system_message: Optional[str] = "You are a helpful AI assistant that remembers user teachings from prior chats.", llm_config: Optional[Union[Dict, bool]] = None, is_termination_msg: Optional[Callable[[Dict], bool]] = None, max_consecutive_auto_reply: Optional[int] = None, @@ -66,6 +65,7 @@ def __init__( self.user_comments = [] # Stores user comments until the end of each chat. def delete_db(self): + """Forces immediate deletion of the DB.""" self.memo_store.db_client.reset() def _generate_teachable_assistant_reply( @@ -112,8 +112,9 @@ def _generate_teachable_assistant_reply( return True, response_text def learn_from_recent_user_comments(self): + """Reviews the user comments from the last chat, and decides what teachings to store as memos.""" if self.verbosity >= 1: - print(colored("\nREVIEW CHAT FOR ITEMS TO REMEMBER", 'light_yellow')) + print(colored("\nREVIEW CHAT FOR USER TEACHINGS TO REMEMBER", 'light_yellow')) # Look at each user turn. if len(self.user_comments) > 0: for comment in self.user_comments: @@ -122,7 +123,7 @@ def learn_from_recent_user_comments(self): self.user_comments = [] def consider_memo_storage(self, comment): - """Decides whether to store something from this user turn in the DB.""" + """Decides whether to store something from one user comment in the DB.""" # Check for a problem-solution pair. response = self.analyze(comment, "Does the TEXT contain a task or problem to solve? Answer with just one word, yes or no.") @@ -158,7 +159,7 @@ def consider_memo_storage(self, comment): self.memo_store.add_input_output_pair(question, answer) def consider_memo_retrieval(self, comment): - """Decides whether to retrieve something from the DB.""" + """Decides whether to retrieve memos from the DB, and add them to the chat context.""" # First, use the user comment directly as the lookup key. if self.verbosity >= 1: @@ -187,6 +188,7 @@ def consider_memo_retrieval(self, comment): return comment + self.concatenate_memo_texts(memo_list) def retrieve_relevant_memos(self, input_text): + """Returns semantically related memos from the DB.""" memo_list = self.memo_store.get_related_memos(input_text, threshold=self.recall_threshold) if self.verbosity >= 1: @@ -202,6 +204,7 @@ def retrieve_relevant_memos(self, input_text): return memo_list def concatenate_memo_texts(self, memo_list): + """Concatenates the memo texts into a single string, and formats them for inclusion in the chat context.""" memo_texts = '' for memo in memo_list: info = "(A memory that might help:\n" + memo + ")" @@ -211,7 +214,7 @@ def concatenate_memo_texts(self, memo_list): return memo_texts def analyze(self, text_to_analyze, analysis_instructions): - """Sends the text to analyze and the analysis instructions to the analyzer.""" + """Asks the analyzer to analyze the given text according to specific instructions.""" if self.verbosity >= 2: # Use the messaging mechanism so that the analyzer's messages are included in the printed chat. self.analyzer.reset() # Clear the analyzer's list of messages. @@ -224,7 +227,18 @@ def analyze(self, text_to_analyze, analysis_instructions): class MemoStore(): + """ + Provides memory storage and retrieval for a TeachableAgent, using a vector database. + Each DB entry (called a memo) is a pair of strings: an input text and an output text. + The input text may be a question, or a task to perform. + The output text may be an answer to the question, or advice for how to perform the task. + Vector embeddings are currently provided by chromadb's default sentence encoder. + """ def __init__(self, verbosity): + """ + Args: + verbosity: 1 to print memory operations, 0 to omit them. + """ self.verbosity = verbosity # TODO: Expose an option to persist the DB to a file on disk. self.db_client = chromadb.Client(Settings(anonymized_telemetry=False, allow_reset=True)) # In-memory by default. @@ -234,7 +248,7 @@ def __init__(self, verbosity): self.info_dict = {} # Maps a memo uid to information like answers or advice. def add_input_output_pair(self, input_text, output_text): - """ Adds an input-output pair to the vector DB. """ + """Adds an input-output pair to the vector DB.""" self.next_uid += 1 self.num_memos += 1 self.vec_db.add(documents=[input_text], ids=[str(self.next_uid)]) @@ -244,7 +258,7 @@ def add_input_output_pair(self, input_text, output_text): input_text, output_text), 'light_green')) def get_nearest_memo(self, query_text): - """ Retrieves the nearest memo to the given query text. """ + """Retrieves the nearest memo to the given query text.""" results = self.vec_db.query(query_texts=[query_text], n_results=1) uid, input_text, distance = results['ids'][0][0], results['documents'][0][0], results['distances'][0][0] output_text = self.info_dict[uid] @@ -254,7 +268,7 @@ def get_nearest_memo(self, query_text): return input_text, output_text, distance def get_related_memos(self, query_text, n_results=4, threshold=1.4): - """ Retrieves memos that are related to the given query text with the threshold. """ + """Retrieves memos that are related to the given query text with the specified threshold.""" results = self.vec_db.query(query_texts=[query_text], n_results=n_results) memos = [] num_results = len(results['ids'][0]) @@ -270,7 +284,7 @@ def get_related_memos(self, query_text, n_results=4, threshold=1.4): return memos def prepopulate(self): - """ Adds arbitrary examples to the vector DB, just to make retrieval less trivial. """ + """Adds a few arbitrary examples to the vector DB, just to make retrieval less trivial.""" examples = [] examples.append({'text': 'When I say papers I mean research papers, which are typically pdfs.', 'label': 'yes'}) examples.append({'text': 'Please verify that each paper you listed actually uses langchain.', 'label': 'no'}) From 86e9e049f2d1bdc0011dfc2362cf474ef8e9b172 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 12 Oct 2023 18:38:23 -0700 Subject: [PATCH 24/50] Persist memory on disk. --- .gitignore | 3 + autogen/agentchat/contrib/teachable_agent.py | 74 +++++++++++++++----- test/agentchat/test_teachable_agent.py | 19 ++++- 3 files changed, 76 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 98517f9d690..47917823422 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,6 @@ key_openai.txt key_aoai.txt base_aoai.txt wolfram.txt + +# DB on disk for TeachableAgent +tmp/ diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index c6b7c122582..236f095ea6c 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -1,3 +1,4 @@ +import os from autogen import oai from autogen.agentchat.agent import Agent from autogen.agentchat.assistant_agent import ConversableAgent @@ -5,6 +6,7 @@ from typing import Callable, Dict, Optional, Union, List, Tuple, Any import chromadb from chromadb.config import Settings +import pickle try: @@ -35,7 +37,8 @@ def __init__( teach_config (dict or None): config for the TeachableAgent. To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: - verbosity (Optional, int): 1 to include memory operations, 2 to add analyzer messages. Default 0. - - prepopulate (Optional, int): 1 to prepopulate the DB with a set of input-output pairs. Default 1. + - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" + - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. - use_cache (Optional, bool): True to skip LLM calls made previously by relying on cached responses. Default False. - recall_threshold (Optional, float): The distance threshold for retrieving memos from the DB. Default 1.5. **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). @@ -54,19 +57,23 @@ def __init__( self._teach_config = {} if teach_config is None else teach_config self.verbosity = self._teach_config.get("verbosity", 0) - self.prepopulate = self._teach_config.get("prepopulate", 1) + self.path_to_db_dir = self._teach_config.get("path_to_db_dir", "./tmp/teachable_agent_db") + self.prepopulate = self._teach_config.get("prepopulate", True) self.use_cache = self._teach_config.get("use_cache", False) self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) self.analyzer = TextAnalyzerAgent("analyzer", llm_config=llm_config) - self.memo_store = MemoStore(self.verbosity) - self.memo_store.prepopulate() + self.memo_store = MemoStore(self.verbosity, self.path_to_db_dir) self.user_comments = [] # Stores user comments until the end of each chat. - def delete_db(self): - """Forces immediate deletion of the DB.""" - self.memo_store.db_client.reset() + def close_db(self): + """Cleanly closes the memo store.""" + self.memo_store.close() + + def reset_db(self): + """Empties the DB.""" + self.memo_store.reset_db(self.prepopulate) def _generate_teachable_assistant_reply( self, @@ -195,7 +202,7 @@ def retrieve_relevant_memos(self, input_text): # Was anything retrieved? if len(memo_list) == 0: # No. Look at the closest memo. - print(colored('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD...', 'light_yellow')) + print(colored('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD:', 'light_yellow')) memo = self.memo_store.get_nearest_memo(input_text) print() # Print a blank line. The memo details were printed by get_nearest_memo(). @@ -234,25 +241,56 @@ class MemoStore(): The output text may be an answer to the question, or advice for how to perform the task. Vector embeddings are currently provided by chromadb's default sentence encoder. """ - def __init__(self, verbosity): + def __init__(self, verbosity, path_to_db_dir): """ Args: - verbosity: 1 to print memory operations, 0 to omit them. + - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. + - path_to_db_dir (Optional, str): path to the directory where the DB is stored. """ self.verbosity = verbosity - # TODO: Expose an option to persist the DB to a file on disk. - self.db_client = chromadb.Client(Settings(anonymized_telemetry=False, allow_reset=True)) # In-memory by default. - self.vec_db = self.db_client.create_collection("memos") # The collection is the DB. + self.path_to_db_dir = path_to_db_dir + # The DB is always persisted on disk. + settings = Settings(anonymized_telemetry=False, allow_reset=True, persist_directory=path_to_db_dir, is_persistent=True) + self.path_to_dict = os.path.join(path_to_db_dir, 'uid_text_dict.pkl') + if os.path.exists(self.path_to_dict): + # Load the dict from disk. + if self.verbosity >= 1: + print(colored("\nLOADING MEMORY FROM DISK", 'light_green')) + print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) + with open(self.path_to_dict, 'rb') as f: + self.uid_text_dict = pickle.load(f) + else: + # Create an empty dict. + self.uid_text_dict = {} + self.db_client = chromadb.Client(settings) + self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. self.next_uid = 0 # Unique ID for each memo. Also serves as a count of total memos added. self.num_memos = 0 - self.info_dict = {} # Maps a memo uid to information like answers or advice. + + def close(self): + """Saves the dict to disk.""" + if self.verbosity >= 1: + print(colored("\nSAVING MEMORY TO DISK", 'light_green')) + print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) + with open(self.path_to_dict, 'wb') as file: + pickle.dump(self.uid_text_dict, file) + + def reset_db(self, prepopulate): + """Forces immediate deletion of the DB's contents, in memory and on disk.""" + if self.verbosity >= 1: + print(colored("\nCLEARING MEMORY", 'light_green')) + self.db_client.delete_collection("memos") + self.vec_db = self.db_client.create_collection("memos") + self.uid_text_dict = {} + if prepopulate: + self.prepopulate() def add_input_output_pair(self, input_text, output_text): """Adds an input-output pair to the vector DB.""" self.next_uid += 1 self.num_memos += 1 self.vec_db.add(documents=[input_text], ids=[str(self.next_uid)]) - self.info_dict[str(self.next_uid)] = output_text + self.uid_text_dict[str(self.next_uid)] = output_text if self.verbosity >= 1: print(colored("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}".format( input_text, output_text), 'light_green')) @@ -261,7 +299,7 @@ def get_nearest_memo(self, query_text): """Retrieves the nearest memo to the given query text.""" results = self.vec_db.query(query_texts=[query_text], n_results=1) uid, input_text, distance = results['ids'][0][0], results['documents'][0][0], results['distances'][0][0] - output_text = self.info_dict[uid] + output_text = self.uid_text_dict[uid] if self.verbosity >= 1: print(colored("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( input_text, output_text, distance), 'light_green')) @@ -275,7 +313,7 @@ def get_related_memos(self, query_text, n_results=4, threshold=1.4): for i in range(num_results): uid, input_text, distance = results['ids'][0][i], results['documents'][0][i], results['distances'][0][i] if distance < threshold: - output_text = self.info_dict[uid] + output_text = self.uid_text_dict[uid] if self.verbosity >= 1: print(colored( "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( @@ -285,6 +323,8 @@ def get_related_memos(self, query_text, n_results=4, threshold=1.4): def prepopulate(self): """Adds a few arbitrary examples to the vector DB, just to make retrieval less trivial.""" + if self.verbosity >= 1: + print(colored("\nPREPOPULATING MEMORY", 'light_green')) examples = [] examples.append({'text': 'When I say papers I mean research papers, which are typically pdfs.', 'label': 'yes'}) examples.append({'text': 'Please verify that each paper you listed actually uses langchain.', 'label': 'no'}) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 06bc1c66975..99e313ba06d 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -30,7 +30,10 @@ def create_teachable_agent(): agent = TeachableAgent( name="agent", llm_config={"config_list": config_list}, - teach_config={"verbosity": verbosity, "recall_threshold": recall_threshold}) + teach_config={ + "verbosity": verbosity, + "path_to_db_dir": "./tmp/teachable_agent_db", + "recall_threshold": recall_threshold}) return agent @@ -53,9 +56,13 @@ def interact_freely_with_user(): user = UserProxyAgent("user", human_input_mode="ALWAYS") # Start the chat. + print(colored("\nLoading previous memory (if any) from disk.", 'light_cyan')) print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) user.initiate_chat(agent, message="Hi") + # Wrap up. + agent.close_db() + def test_question_answer_pair(): """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" @@ -64,6 +71,9 @@ def test_question_answer_pair(): agent = create_teachable_agent() user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") + # For a clean test, clear the agent's memory. + agent.reset_db() + # Ask the agent to do something using terminology it doesn't understand. user.initiate_chat(recipient=agent, message="What is the twist of 5 and 7?") @@ -80,7 +90,7 @@ def test_question_answer_pair(): num_errors += check_agent_response(agent, user, "35") # Wrap up. - agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. + agent.close_db() return num_errors @@ -91,6 +101,9 @@ def test_task_advice_pair(): agent = create_teachable_agent() user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") + # For a clean test, clear the agent's memory. + agent.reset_db() + # Ask the agent to do something, and provide some helpful advice. user.initiate_chat(recipient=agent, message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.") num_errors += check_agent_response(agent, user, "23") @@ -104,7 +117,7 @@ def test_task_advice_pair(): num_errors += check_agent_response(agent, user, "35") # Wrap up. - agent.delete_db() # Delete the DB now, instead of waiting for garbage collection to do it. + agent.close_db() return num_errors From 60bd4c24736a5eaa2c66ad5b05085d55edb267e0 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 13 Oct 2023 17:43:38 -0700 Subject: [PATCH 25/50] cleanup --- autogen/agentchat/contrib/teachable_agent.py | 60 ++++++++------ test/agentchat/test_teachable_agent.py | 83 ++++++++++++-------- 2 files changed, 87 insertions(+), 56 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 236f095ea6c..49cc09fac9b 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -37,6 +37,7 @@ def __init__( teach_config (dict or None): config for the TeachableAgent. To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: - verbosity (Optional, int): 1 to include memory operations, 2 to add analyzer messages. Default 0. + - reset_db (Optional, bool): True to clear the DB before starting. Default False. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. - use_cache (Optional, bool): True to skip LLM calls made previously by relying on cached responses. Default False. @@ -57,6 +58,7 @@ def __init__( self._teach_config = {} if teach_config is None else teach_config self.verbosity = self._teach_config.get("verbosity", 0) + self.reset_db = self._teach_config.get("reset_db", False) self.path_to_db_dir = self._teach_config.get("path_to_db_dir", "./tmp/teachable_agent_db") self.prepopulate = self._teach_config.get("prepopulate", True) self.use_cache = self._teach_config.get("use_cache", False) @@ -64,7 +66,7 @@ def __init__( self.analyzer = TextAnalyzerAgent("analyzer", llm_config=llm_config) - self.memo_store = MemoStore(self.verbosity, self.path_to_db_dir) + self.memo_store = MemoStore(self.verbosity, self.reset_db, self.path_to_db_dir) self.user_comments = [] # Stores user comments until the end of each chat. def close_db(self): @@ -73,7 +75,11 @@ def close_db(self): def reset_db(self): """Empties the DB.""" - self.memo_store.reset_db(self.prepopulate) + self.memo_store.reset_db() + + def prepopulate_db(self): + """Initializes the DB with a few arbitrary memos.""" + self.memo_store.prepopulate() def _generate_teachable_assistant_reply( self, @@ -103,7 +109,7 @@ def _generate_teachable_assistant_reply( # This is a normal user turn. Keep track of it for potential storage later. self.user_comments.append(user_text) - if self.memo_store.num_memos > 0: + if self.memo_store.last_memo_id > 0: # Consider whether to retrieve something from the DB. new_user_text = self.consider_memo_retrieval(user_text) if new_user_text != user_text: @@ -152,14 +158,14 @@ def consider_memo_storage(self, comment): # Check for information to be learned. response = self.analyze(comment, - "Does the TEXT contain information that might be useful later? Answer with just one word, yes or no.") + "Does the TEXT contain information that could be committed to memory? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Yes. What question would this information answer? question = self.analyze(comment, "Imagine that the user forgot this information in the TEXT. How would they ask you for this information? Include no other text in your response.") # Extract the information. answer = self.analyze(comment, - "Briefly copy the information from the TEXT that may be useful later.") + "Copy the information from the TEXT that should be committed to memory. Add no explanation.") # Add the question-answer pair to the vector DB. if self.verbosity >= 1: print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", 'light_yellow')) @@ -241,31 +247,38 @@ class MemoStore(): The output text may be an answer to the question, or advice for how to perform the task. Vector embeddings are currently provided by chromadb's default sentence encoder. """ - def __init__(self, verbosity, path_to_db_dir): + def __init__(self, verbosity, reset, path_to_db_dir): """ Args: - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. """ self.verbosity = verbosity + self.reset = reset self.path_to_db_dir = path_to_db_dir - # The DB is always persisted on disk. + + # Load or create the vector DB on disk. settings = Settings(anonymized_telemetry=False, allow_reset=True, persist_directory=path_to_db_dir, is_persistent=True) + self.db_client = chromadb.Client(settings) + self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. + if reset: + self.reset_db() + + # Load or create the associated memo dict on disk. self.path_to_dict = os.path.join(path_to_db_dir, 'uid_text_dict.pkl') - if os.path.exists(self.path_to_dict): - # Load the dict from disk. + self.uid_text_dict = {} + self.last_memo_id = 0 + if (not reset) and os.path.exists(self.path_to_dict): if self.verbosity >= 1: print(colored("\nLOADING MEMORY FROM DISK", 'light_green')) print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) with open(self.path_to_dict, 'rb') as f: self.uid_text_dict = pickle.load(f) - else: - # Create an empty dict. - self.uid_text_dict = {} - self.db_client = chromadb.Client(settings) - self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. - self.next_uid = 0 # Unique ID for each memo. Also serves as a count of total memos added. - self.num_memos = 0 + self.last_memo_id = len(self.uid_text_dict) + if self.verbosity >= 1: + print(colored("LIST OF MEMOS LOADED FROM DISK", 'light_green')) + for uid, text in self.uid_text_dict.items(): + print(colored(" ID: {} TEXT: {}".format(uid, text), 'light_green')) def close(self): """Saves the dict to disk.""" @@ -275,25 +288,22 @@ def close(self): with open(self.path_to_dict, 'wb') as file: pickle.dump(self.uid_text_dict, file) - def reset_db(self, prepopulate): + def reset_db(self): """Forces immediate deletion of the DB's contents, in memory and on disk.""" if self.verbosity >= 1: print(colored("\nCLEARING MEMORY", 'light_green')) self.db_client.delete_collection("memos") self.vec_db = self.db_client.create_collection("memos") self.uid_text_dict = {} - if prepopulate: - self.prepopulate() def add_input_output_pair(self, input_text, output_text): """Adds an input-output pair to the vector DB.""" - self.next_uid += 1 - self.num_memos += 1 - self.vec_db.add(documents=[input_text], ids=[str(self.next_uid)]) - self.uid_text_dict[str(self.next_uid)] = output_text + self.last_memo_id += 1 + self.vec_db.add(documents=[input_text], ids=[str(self.last_memo_id)]) + self.uid_text_dict[str(self.last_memo_id)] = output_text if self.verbosity >= 1: - print(colored("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}".format( - input_text, output_text), 'light_green')) + print(colored("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}".format( + self.last_memo_id, input_text, output_text), 'light_green')) def get_nearest_memo(self, query_text): """Retrieves the nearest memo to the given query text.""" diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 99e313ba06d..1abdcc15359 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -10,28 +10,29 @@ def colored(x, *args, **kwargs): return x -verbosity = 1 # 1 to include memory operations, 2 to add analyzer messages. -assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 can sometimes fail. -recall_threshold = 1.5 # Higher numbers allow more memos to be recalled, but can also lead to more false positives. +verbosity = 1 # 0 to print the basic user chat, 1 to include memory operations, 2 to add analyzer messages. +assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. +recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. -# Load LLM inference endpoints from an env variable or a file -# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints -# and OAI_CONFIG_LIST_sample +# Specify the model to use by uncommenting one of the following lines. +# filter_dict={"model": ["gpt-4-0613"]} +# filter_dict={"model": ["gpt-3.5-turbo-0613"]} +# filter_dict={"model": ["gpt-4"]} +filter_dict={"model": ["gpt-35-turbo-16k"]} -# Define a config_list for the specific model you want to use. -# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4-0613"]}) # OpenAI -# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-3.5-turbo-0613"]}) # OpenAI -# config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-4"]}) # Azure -config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict={"model": ["gpt-35-turbo-16k"]}) # Azure - -def create_teachable_agent(): +def create_teachable_agent(reset_db=False): """Instantiates a TeachableAgent using the settings from the top of this file.""" + # Load LLM inference endpoints from an env variable or a file + # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints + # and OAI_CONFIG_LIST_sample + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) agent = TeachableAgent( name="agent", llm_config={"config_list": config_list}, teach_config={ "verbosity": verbosity, + "reset_db": reset_db, "path_to_db_dir": "./tmp/teachable_agent_db", "recall_threshold": recall_threshold}) return agent @@ -52,14 +53,19 @@ def check_agent_response(agent, user, correct_answer): def interact_freely_with_user(): """Starts a free-form chat between the user and a TeachableAgent.""" - agent = create_teachable_agent() + + # Create the agents. + print(colored("\nLoading previous memory (if any) from disk.", 'light_cyan')) + agent = create_teachable_agent(reset_db=False) user = UserProxyAgent("user", human_input_mode="ALWAYS") # Start the chat. - print(colored("\nLoading previous memory (if any) from disk.", 'light_cyan')) print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) user.initiate_chat(agent, message="Hi") + # Let the agent remember things that should be learned from this chat. + agent.learn_from_recent_user_comments() + # Wrap up. agent.close_db() @@ -67,19 +73,20 @@ def interact_freely_with_user(): def test_question_answer_pair(): """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" print(colored("\nTEST QUESTION-ANSWER PAIRS", 'light_cyan')) - num_errors = 0 - agent = create_teachable_agent() + num_errors, num_tests = 0, 0 + agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") - # For a clean test, clear the agent's memory. - agent.reset_db() + # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. + agent.prepopulate_db() # Ask the agent to do something using terminology it doesn't understand. user.initiate_chat(recipient=agent, message="What is the twist of 5 and 7?") # Explain the terminology to the agent. - user.send(recipient=agent, message="The twist of two or more numbers is their product minus their sum.") + user.send(recipient=agent, message="Actually, the twist of two or more numbers is their product minus their sum.") num_errors += check_agent_response(agent, user, "23") + num_tests += 1 # Let the agent remember things that should be learned from this chat. agent.learn_from_recent_user_comments() @@ -88,25 +95,27 @@ def test_question_answer_pair(): print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") num_errors += check_agent_response(agent, user, "35") + num_tests += 1 # Wrap up. agent.close_db() - return num_errors + return num_errors, num_tests def test_task_advice_pair(): """Tests whether the agent can recall and use advice after being taught a task-advice pair in a previous chat.""" print(colored("\nTEST TASK-ADVICE PAIRS", 'light_cyan')) - num_errors = 0 - agent = create_teachable_agent() + num_errors, num_tests = 0, 0 + agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") - # For a clean test, clear the agent's memory. - agent.reset_db() + # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. + agent.prepopulate_db() # Ask the agent to do something, and provide some helpful advice. user.initiate_chat(recipient=agent, message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.") num_errors += check_agent_response(agent, user, "23") + num_tests += 1 # Let the agent remember things that should be learned from this chat. agent.learn_from_recent_user_comments() @@ -115,10 +124,11 @@ def test_task_advice_pair(): print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") num_errors += check_agent_response(agent, user, "35") + num_tests += 1 # Wrap up. agent.close_db() - return num_errors + return num_errors, num_tests if __name__ == "__main__": @@ -128,10 +138,21 @@ def test_task_advice_pair(): interact_freely_with_user() exit() - total_num_errors = 0 - total_num_errors += test_question_answer_pair() - total_num_errors += test_task_advice_pair() + total_num_errors, total_num_tests = 0, 0 + + num_trials = 1 + for trial in range(num_trials): + num_errors, num_tests = test_question_answer_pair() + total_num_errors += num_errors + total_num_tests += num_tests + + num_errors, num_tests = test_task_advice_pair() + total_num_errors += num_errors + total_num_tests += num_tests + + print(colored(f"\nTRIAL {trial + 1} OF {num_trials} FINISHED", 'light_cyan')) + if total_num_errors == 0: - print(colored("\nTEACHABLE AGENT TESTS COMPLETED SUCCESSFULLY", 'light_cyan')) + print(colored("\nTEACHABLE AGENT TESTS FINISHED WITH ZERO ERRORS", 'light_cyan')) else: - print(colored(f"\nTEACHABLE AGENT TESTS COMPLETED WITH {total_num_errors} TOTAL ERRORS", 'light_red')) + print(colored(f"\nTEACHABLE AGENT TESTS FINISHED WITH {total_num_errors} / {total_num_tests} TOTAL ERRORS ({100.0 * total_num_errors / total_num_tests}%)", 'light_red')) From b0ee20747c214da9f373888a46f882ef0c089cb3 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Mon, 16 Oct 2023 11:11:20 -0700 Subject: [PATCH 26/50] Use markdown to format retrieved memos. --- autogen/agentchat/contrib/teachable_agent.py | 10 ++++++---- test/agentchat/test_teachable_agent.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 49cc09fac9b..3b952de24a8 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -217,12 +217,14 @@ def retrieve_relevant_memos(self, input_text): return memo_list def concatenate_memo_texts(self, memo_list): - """Concatenates the memo texts into a single string, and formats them for inclusion in the chat context.""" + """Concatenates the memo texts into a single string for inclusion in the chat context.""" memo_texts = '' - for memo in memo_list: - info = "(A memory that might help:\n" + memo + ")" + if len(memo_list) > 0: + info = "\n# Memories that might help\n" + for memo in memo_list: + info = info + "- " + memo + "\n" if self.verbosity >= 1: - print(colored('\nMEMO APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 'light_yellow')) + print(colored('\nMEMOS APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 'light_yellow')) memo_texts = memo_texts + '\n' + info return memo_texts diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 1abdcc15359..f95556cf389 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -84,7 +84,7 @@ def test_question_answer_pair(): user.initiate_chat(recipient=agent, message="What is the twist of 5 and 7?") # Explain the terminology to the agent. - user.send(recipient=agent, message="Actually, the twist of two or more numbers is their product minus their sum.") + user.send(recipient=agent, message="Actually, the twist of two or more numbers is their product minus their sum. Try again.") num_errors += check_agent_response(agent, user, "23") num_tests += 1 From 260962d2709226cb19dd68aaf09deb19a2d21a09 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Mon, 16 Oct 2023 12:19:24 -0700 Subject: [PATCH 27/50] Use markdown in TextAnalyzerAgent --- autogen/agentchat/contrib/text_analyzer_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index f242bb64d15..dc3ad13c38f 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -51,8 +51,8 @@ def _analyze_in_reply( def analyze_text(self, text_to_analyze, analysis_instructions): """Analyzes the given text as instructed, and returns the analysis.""" # Assemble the message. - text_to_analyze = 'TEXT: ' + text_to_analyze - analysis_instructions = 'INSTRUCTIONS: ' + analysis_instructions + text_to_analyze = '# TEXT\n' + text_to_analyze + '\n' + analysis_instructions = '# Instructions\n' + analysis_instructions + '\n' msg_text = '\n'.join([analysis_instructions, text_to_analyze, analysis_instructions]) # Repeat the instructions. messages = self._oai_system_message + [{"role": "user", "content": msg_text}] From 0da73a3f2822df84c90342949e08be330bfc7b2f Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Mon, 16 Oct 2023 15:56:45 -0700 Subject: [PATCH 28/50] Add another verbosity level. --- autogen/agentchat/contrib/teachable_agent.py | 28 +++++++++++++------- test/agentchat/test_teachable_agent.py | 8 +++--- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 3b952de24a8..90aaa4e37eb 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -36,7 +36,7 @@ def __init__( human_input_mode (str): NEVER ask for human input for this agent. teach_config (dict or None): config for the TeachableAgent. To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: - - verbosity (Optional, int): 1 to include memory operations, 2 to add analyzer messages. Default 0. + - verbosity (Optional, int): # 0 (default) for basic user chat, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. - reset_db (Optional, bool): True to clear the DB before starting. Default False. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. @@ -277,10 +277,14 @@ def __init__(self, verbosity, reset, path_to_db_dir): with open(self.path_to_dict, 'rb') as f: self.uid_text_dict = pickle.load(f) self.last_memo_id = len(self.uid_text_dict) - if self.verbosity >= 1: - print(colored("LIST OF MEMOS LOADED FROM DISK", 'light_green')) - for uid, text in self.uid_text_dict.items(): - print(colored(" ID: {} TEXT: {}".format(uid, text), 'light_green')) + if self.verbosity >= 3: + self.list_memos() + + def list_memos(self): + print(colored("LIST OF MEMOS", 'light_green')) + for uid, text in self.uid_text_dict.items(): + input_text, output_text = text + print(colored(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text), 'light_green')) def close(self): """Saves the dict to disk.""" @@ -302,18 +306,21 @@ def add_input_output_pair(self, input_text, output_text): """Adds an input-output pair to the vector DB.""" self.last_memo_id += 1 self.vec_db.add(documents=[input_text], ids=[str(self.last_memo_id)]) - self.uid_text_dict[str(self.last_memo_id)] = output_text + self.uid_text_dict[str(self.last_memo_id)] = input_text, output_text if self.verbosity >= 1: print(colored("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}".format( self.last_memo_id, input_text, output_text), 'light_green')) + if self.verbosity >= 3: + self.list_memos() def get_nearest_memo(self, query_text): """Retrieves the nearest memo to the given query text.""" results = self.vec_db.query(query_texts=[query_text], n_results=1) uid, input_text, distance = results['ids'][0][0], results['documents'][0][0], results['distances'][0][0] - output_text = self.uid_text_dict[uid] + input_text_2, output_text = self.uid_text_dict[uid] + assert input_text == input_text_2 if self.verbosity >= 1: - print(colored("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + print(colored("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( input_text, output_text, distance), 'light_green')) return input_text, output_text, distance @@ -325,10 +332,11 @@ def get_related_memos(self, query_text, n_results=4, threshold=1.4): for i in range(num_results): uid, input_text, distance = results['ids'][0][i], results['documents'][0][i], results['distances'][0][i] if distance < threshold: - output_text = self.uid_text_dict[uid] + input_text_2, output_text = self.uid_text_dict[uid] + assert input_text == input_text_2 if self.verbosity >= 1: print(colored( - "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( input_text, output_text, distance), 'light_green')) memos.append((input_text, output_text, distance)) return memos diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index f95556cf389..3204a586256 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -10,8 +10,8 @@ def colored(x, *args, **kwargs): return x -verbosity = 1 # 0 to print the basic user chat, 1 to include memory operations, 2 to add analyzer messages. -assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. +verbosity = 0 # 0 for basic user chat, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. +assert_on_error = True # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. # Specify the model to use by uncommenting one of the following lines. @@ -29,7 +29,9 @@ def create_teachable_agent(reset_db=False): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) agent = TeachableAgent( name="agent", - llm_config={"config_list": config_list}, + llm_config={ + "config_list": config_list, + "request_timeout": 120}, teach_config={ "verbosity": verbosity, "reset_db": reset_db, From 15bd56bca981149e59a1c79a57881c02445be0b5 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Mon, 16 Oct 2023 16:21:20 -0700 Subject: [PATCH 29/50] clean up logging --- autogen/agentchat/contrib/teachable_agent.py | 23 +++++++++++--------- test/agentchat/test_teachable_agent.py | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 90aaa4e37eb..3d8b5b34c2e 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -36,7 +36,7 @@ def __init__( human_input_mode (str): NEVER ask for human input for this agent. teach_config (dict or None): config for the TeachableAgent. To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: - - verbosity (Optional, int): # 0 (default) for basic user chat, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + - verbosity (Optional, int): # 0 (default) for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. - reset_db (Optional, bool): True to clear the DB before starting. Default False. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. @@ -126,8 +126,7 @@ def _generate_teachable_assistant_reply( def learn_from_recent_user_comments(self): """Reviews the user comments from the last chat, and decides what teachings to store as memos.""" - if self.verbosity >= 1: - print(colored("\nREVIEW CHAT FOR USER TEACHINGS TO REMEMBER", 'light_yellow')) + print(colored("\nREVIEW CHAT FOR USER TEACHINGS TO REMEMBER", 'light_yellow')) # Look at each user turn. if len(self.user_comments) > 0: for comment in self.user_comments: @@ -260,7 +259,11 @@ def __init__(self, verbosity, reset, path_to_db_dir): self.path_to_db_dir = path_to_db_dir # Load or create the vector DB on disk. - settings = Settings(anonymized_telemetry=False, allow_reset=True, persist_directory=path_to_db_dir, is_persistent=True) + settings = Settings( + anonymized_telemetry=False, + allow_reset=True, + is_persistent=True, + persist_directory=path_to_db_dir) self.db_client = chromadb.Client(settings) self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. if reset: @@ -271,9 +274,8 @@ def __init__(self, verbosity, reset, path_to_db_dir): self.uid_text_dict = {} self.last_memo_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - if self.verbosity >= 1: - print(colored("\nLOADING MEMORY FROM DISK", 'light_green')) - print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) + print(colored("\nLOADING MEMORY FROM DISK", 'light_green')) + print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) with open(self.path_to_dict, 'rb') as f: self.uid_text_dict = pickle.load(f) self.last_memo_id = len(self.uid_text_dict) @@ -288,9 +290,8 @@ def list_memos(self): def close(self): """Saves the dict to disk.""" - if self.verbosity >= 1: - print(colored("\nSAVING MEMORY TO DISK", 'light_green')) - print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) + print(colored("\nSAVING MEMORY TO DISK", 'light_green')) + print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) with open(self.path_to_dict, 'wb') as file: pickle.dump(self.uid_text_dict, file) @@ -326,6 +327,8 @@ def get_nearest_memo(self, query_text): def get_related_memos(self, query_text, n_results=4, threshold=1.4): """Retrieves memos that are related to the given query text with the specified threshold.""" + if n_results > len(self.uid_text_dict): + n_results = len(self.uid_text_dict) results = self.vec_db.query(query_texts=[query_text], n_results=n_results) memos = [] num_results = len(results['ids'][0]) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 3204a586256..317efb67984 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -10,7 +10,7 @@ def colored(x, *args, **kwargs): return x -verbosity = 0 # 0 for basic user chat, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. +verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. assert_on_error = True # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. From cd4cd8fd5c3b25f2ba376d98c71b16ff6f0e9823 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 17 Oct 2023 14:51:30 -0700 Subject: [PATCH 30/50] notebook --- autogen/agentchat/contrib/teachable_agent.py | 15 +- notebook/agentchat_Teachability.ipynb | 778 +++++++++++++++++++ test/agentchat/test_teachable_agent.py | 4 +- 3 files changed, 786 insertions(+), 11 deletions(-) create mode 100644 notebook/agentchat_Teachability.ipynb diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 3d8b5b34c2e..5b7980b7148 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -42,6 +42,7 @@ def __init__( - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. - use_cache (Optional, bool): True to skip LLM calls made previously by relying on cached responses. Default False. - recall_threshold (Optional, float): The distance threshold for retrieving memos from the DB. Default 1.5. + - max_num_retrievals (Optional, int): The maximum number of memos to retrieve from the DB. Default 10. **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). """ super().__init__( @@ -63,6 +64,7 @@ def __init__( self.prepopulate = self._teach_config.get("prepopulate", True) self.use_cache = self._teach_config.get("use_cache", False) self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) + self.max_num_retrievals = self._teach_config.get("max_num_retrievals", 10) self.analyzer = TextAnalyzerAgent("analyzer", llm_config=llm_config) @@ -73,10 +75,6 @@ def close_db(self): """Cleanly closes the memo store.""" self.memo_store.close() - def reset_db(self): - """Empties the DB.""" - self.memo_store.reset_db() - def prepopulate_db(self): """Initializes the DB with a few arbitrary memos.""" self.memo_store.prepopulate() @@ -126,7 +124,7 @@ def _generate_teachable_assistant_reply( def learn_from_recent_user_comments(self): """Reviews the user comments from the last chat, and decides what teachings to store as memos.""" - print(colored("\nREVIEW CHAT FOR USER TEACHINGS TO REMEMBER", 'light_yellow')) + print(colored("\nREVIEWING CHAT FOR USER TEACHINGS TO REMEMBER", 'light_yellow')) # Look at each user turn. if len(self.user_comments) > 0: for comment in self.user_comments: @@ -201,7 +199,7 @@ def consider_memo_retrieval(self, comment): def retrieve_relevant_memos(self, input_text): """Returns semantically related memos from the DB.""" - memo_list = self.memo_store.get_related_memos(input_text, threshold=self.recall_threshold) + memo_list = self.memo_store.get_related_memos(input_text, n_results=self.max_num_retrievals, threshold=self.recall_threshold) if self.verbosity >= 1: # Was anything retrieved? @@ -297,8 +295,7 @@ def close(self): def reset_db(self): """Forces immediate deletion of the DB's contents, in memory and on disk.""" - if self.verbosity >= 1: - print(colored("\nCLEARING MEMORY", 'light_green')) + print(colored("\nCLEARING MEMORY", 'light_green')) self.db_client.delete_collection("memos") self.vec_db = self.db_client.create_collection("memos") self.uid_text_dict = {} @@ -325,7 +322,7 @@ def get_nearest_memo(self, query_text): input_text, output_text, distance), 'light_green')) return input_text, output_text, distance - def get_related_memos(self, query_text, n_results=4, threshold=1.4): + def get_related_memos(self, query_text, n_results, threshold): """Retrieves memos that are related to the given query text with the specified threshold.""" if n_results > len(self.uid_text_dict): n_results = len(self.uid_text_dict) diff --git a/notebook/agentchat_Teachability.ipynb b/notebook/agentchat_Teachability.ipynb new file mode 100644 index 00000000000..8151a002807 --- /dev/null +++ b/notebook/agentchat_Teachability.ipynb @@ -0,0 +1,778 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Chatting with TeachableAgent\n", + "\n", + "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long. In future chats, the user is forced to repeat any instructions that will be needed.\n", + "\n", + "This notebook demonstrates a few ways of using `TeachableAgent`, which persists user teachings across chat boundaries in long-term memory (a vector database). Memory is save to disk at the end of each chat, and loaded from disk at the start of the next. Instead of copying all of memory into the chat context, which would take up valuable space, individual memories (called memos) are retrieved into the chat context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in future chats.\n", + "\n", + "## Requirements\n", + "\n", + "AutoGen requires `Python>=3.8`. To run this notebook example, please install the [teachability] option.\n", + "```bash\n", + "pip install \"pyautogen[teachability]\"\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture --no-stderr\n", + "# %pip install \"pyautogen[teachability]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set your API Endpoint\n", + "\n", + "The [`config_list_from_json`](https://microsoft.github.io/autogen/docs/reference/oai/openai_utils#config_list_from_json) function loads a list of configurations from an environment variable or a json file." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gpt-4-0613\n" + ] + } + ], + "source": [ + "import autogen\n", + "\n", + "config_list = autogen.config_list_from_json(\n", + " env_or_file=\"OAI_CONFIG_LIST\",\n", + " file_location=\".\",\n", + " filter_dict={\n", + " \"model\": [\"gpt-4-0613\", \"gpt-4\", \"gpt4\", \"gpt-4-32k\"],\n", + " },\n", + ")\n", + "\n", + "print(config_list[0][\"model\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It first looks for environment variable \"OAI_CONFIG_LIST\" which needs to be a valid json string. If that variable is not found, it then looks for a json file named \"OAI_CONFIG_LIST\". It filters the configs by models (you can filter by other keys as well). After application of this particular filter, only the gpt-4 and gpt-3.5-turbo models are kept.\n", + "\n", + "The config list looks like the following:\n", + "```python\n", + "config_list = [\n", + " {\n", + " 'model': 'gpt-4',\n", + " 'api_key': '',\n", + " },\n", + " {\n", + " 'model': 'gpt-4',\n", + " 'api_key': '',\n", + " 'api_base': '',\n", + " 'api_type': 'azure',\n", + " 'api_version': '2023-06-01-preview',\n", + " },\n", + " {\n", + " 'model': 'gpt-4-32k',\n", + " 'api_key': '',\n", + " 'api_base': '',\n", + " 'api_type': 'azure',\n", + " 'api_version': '2023-06-01-preview',\n", + " },\n", + "]\n", + "```\n", + "\n", + "If you open this notebook in colab, you can upload your files by clicking the file icon on the left panel and then choose \"upload file\" icon.\n", + "\n", + "You can set the value of config_list in other ways if you prefer, e.g., loading from a YAML file." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Construct Agents\n", + "For this walkthrough, we start by resetting the agent's memory store. This deletes any memories from prior conversations that may be stored on disk." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[92m\n", + "CLEARING MEMORY\u001b[0m\n" + ] + } + ], + "source": [ + "from autogen.agentchat.contrib.teachable_agent import TeachableAgent\n", + "from autogen import UserProxyAgent\n", + "\n", + "llm_config = {\n", + " \"request_timeout\": 60,\n", + " \"config_list\": config_list,\n", + "}\n", + "\n", + "teach_config={\n", + " \"verbosity\": 0, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.\n", + " \"reset_db\": True, # Set to True to start over with an empty database.\n", + " \"path_to_db_dir\": \"./tmp/notebook/teachable_agent_db\", # Path to the directory where the database will be stored.\n", + " \"recall_threshold\": 1.5, # Higher numbers allow more (but less relevant) memos to be recalled.\n", + "}\n", + "\n", + "try:\n", + " from termcolor import colored\n", + "except ImportError:\n", + " def colored(x, *args, **kwargs):\n", + " return x\n", + " \n", + "autogen.ChatCompletion.start_logging()\n", + "\n", + "# print(colored(\"\\nLoading previous memory (if any) from disk.\", 'light_cyan'))\n", + "\n", + "agent = TeachableAgent(\n", + " name=\"agent\",\n", + " llm_config=llm_config,\n", + " teach_config=teach_config)\n", + "\n", + "user = UserProxyAgent(\n", + " name=\"user\",\n", + " human_input_mode=\"NEVER\",\n", + " is_termination_msg=lambda x: True if \"TERMINATE\" in x.get(\"content\") else False,\n", + " max_consecutive_auto_reply=0,\n", + ")\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning new facts\n", + "Let's teach the agent some facts it doesn't already know, since they are more recent than GPT-4's training data." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "What is the Vicuna model?\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "I'm sorry, but I currently do not have any information about a \"Vicuna model\" in my database. It could be because \"Vicuna model\" is domain-specific jargon or perhaps a term used in a specific field of study or industry. Could you please provide additional context or specify the field this term is related to? This could help me provide a more accurate response.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"What is the Vicuna model?\"\n", + "user.initiate_chat(agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Vicuna is a 13B-parameter language model released by Meta.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "I see, thank you for the clarification. As a learning AI model, I wasn't initially aware of this information, but I understand now.\n", + "\n", + "Vicuna is a large language model developed by Meta AI (formerly known as Facebook AI). This model, which has 13 billion parameters, is part of their efforts to advance AI capabilities. \n", + "\n", + "Vicuna is designed to understand and generate human language and follows the path set by models like GPT-3. Nonetheless, the utilization specifics and way how Vicuna model has been trained may vary because methodologies and training datasets used by different organizations might be different. \n", + "\n", + "Remember, the larger the model in terms of parameters, the more potential it has to understand complex language patterns but it also requires more computational resources.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"Vicuna is a 13B-parameter language model released by Meta.\"\n", + "user.initiate_chat(agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "What is the Orca model?\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "I'm sorry for any confusion, but as of now, I don't have any information on an \"Orca model\" relative to artificial intelligence, computational models, or any similar context. The term \"Orca model\" might be specific to a certain field or a recent development. If it has just been introduced, it's possible that I might not have been updated with this information yet. Could you provide more context or clarify what field this model is used in? This would help me give a more accurate response.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"What is the Orca model?\"\n", + "user.initiate_chat(agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "Thank you for the information. Although I didn't have this knowledge before, I understand now.\n", + "\n", + "Orca is a 13-billion parameter language model developed by Microsoft. Like Vicuna by Meta AI, these large-scale models are part of the recent wave of highly advanced language understanding AI. The purpose of these models is to understand and generate human-like text.\n", + "\n", + "You mentioned that Orca outperforms Vicuna on most tasks, suggesting that the techniques and training regimens used in creating Orca could be advanced or more effective. However, without specific details on their design, training data, and methodology, it would be challenging to do a straightforward comparison.\n", + "\n", + "Please note that advancements and performance in AI models can vary due to the diversity of algorithms, methodologies, training data, and uses. The continuous innovation in this field leads to frequent changes in which models are currently leading in performance.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\"\n", + "user.initiate_chat(agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's consider this the end of our first chat. The following function needs to be called at the end of each chat, so that `TeachableAgent` can store what the user has taught it." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n" + ] + } + ], + "source": [ + "agent.learn_from_recent_user_comments()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's start a new chat by clearing the previous chat's history. At this point, common LLM-based assistants would forget everything from the last chat. But `TeachableAgent` can retrieve memories from its vector DB as needed, allowing it to recall and reason over facts that the user taught it in earlier conversations." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "How does the Vicuna model compare to the Orca model?\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "The Orca and Vicuna models both have a total of 13B parameters, indicating similar complexity. However, they were developed by different entities, with Orca being developed by Microsoft and Vicuna by Meta.\n", + "\n", + "In a comparative performance perspective, Orca generally outperforms Vicuna on most tasks, according to Microsoft's release notes and testing. The specific metric improvement over Vicuna can vary depending on the nature of the task, the dataset used, and other factors. \n", + "\n", + "Unfortunately, without more specific information, it's difficult to provide a detailed comparison between the two models. Factors like encoding strategies, underlying algorithms, training procedures, and more could also significantly contribute to their performance. You can look up more details in their respective technical documents or ask the developers for more specifics.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"How does the Vicuna model compare to the Orca model?\"\n", + "user.initiate_chat(agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning user preferences\n", + "Now let's teach the agent some of our preferences. Suppose that we frequently post short summaries of new papers for our team to read, and we want the agent to help us do this faster." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Summarize this abstract.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "AutoGen is an open-source framework that allows for the creation of language model applications through customizable and conversable agents. These agents can use a combination of language models, human inputs, and various tools across different modes for accomplishing tasks. Developers can define interaction behaviors and also program conversation patterns using both natural language and computer code. AutoGen is useful for building a variety of applications with different complexities. Empirical studies have shown its effectiveness in numerous areas such as mathematics, coding, question answering, operations research, online decision-making, and entertainment.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Summarize this abstract.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\"\"\"\n", + "user.initiate_chat(agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But that's too long. So let's teach the agent our preference." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Summarize this abstract. \n", + "When summarizing an abstract, I like the summary to contain just three short bullet points: the title, the innovation, and the key empirical results.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "- Title: \"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\"\n", + "- Innovation: AutoGen is introduced as an open-source framework, which facilitates developers to construct language-learning model (LLM) applications using multiple conversable agents. These agents can function across various methods utilizing LLMs, human contributions, and tools. Moreover, it allows developers to outline flexible agent interaction behaviors using both natural language and computer code.\n", + "- Key Empirical Results: Empirical studies highlight the efficacy of AutoGen and its usability across domains like mathematics, coding, question answering, operations research, online decision-making, and entertainment.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Summarize this abstract. \n", + "When summarizing an abstract, I like the summary to contain just three short bullet points: the title, the innovation, and the key empirical results.\n", + "\n", + "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", + "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", + "\"\"\"\n", + "user.initiate_chat(agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's better, but will the agent remember these preferences in the future, for a different paper? Let's start a new chat to find out." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Summarize this abstract.\n", + "\n", + "Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "Sébastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, Yi Zhang\n", + "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "- Title: \"Sparks of Artificial General Intelligence: Early experiments with GPT-4\"\n", + "- Innovation: This research focuses on the early version of OpenAI’s GPT-4, which shows more general intelligence than prior AI models, being proficient across different domains such as mathematics, coding, vision, medicine, and more without needing specific prompting.\n", + "- Key Empirical Results: Experimentation showed GPT-4's performance was close to human-level or even superior to previous models like ChatGPT across various tasks. Despite its limitations, it suggests the potential of serving as an initial stepping stone towards artificial general intelligence (AGI), potentially requiring a different paradigm from just next-word prediction.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "agent.learn_from_recent_user_comments()\n", + "\n", + "text = \"\"\"Summarize this abstract.\n", + "\n", + "Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "Sébastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, Yi Zhang\n", + "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\"\"\"\n", + "user.initiate_chat(agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning hard tasks\n", + "Finally, let's extend the agent's capabilities by teaching it how to accomplish challenging tasks. \n", + "\n", + "The [Sparks of AGI](https://arxiv.org/abs/2303.12712) paper evaluated GPT-4 on math problems like the following, which it could solve only 32% of the time." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "Certainly, let's figure out the solution.\n", + "\n", + "Plan:\n", + "1. The goal is to increase the current total (72) to 99 by changing only one integer on the left-hand side.\n", + "2. This would require an increase of 27.\n", + "3. Look at each integer and think about how changing it might help reach the goal.\n", + "4. Intuitively, increasing an integer involved in multiplication will have a more significant impact than increasing an integer involved in addition.\n", + "5. Remember, the integers are multiplied first due to the order of operations.\n", + "6. Starting with the \"9 * 4\", replacing 9 with any number would result in multiples of 4. But, none of these within a reasonable range would give an increase of 27.\n", + "7. Checking \"6 * 6\", we realize that replacing the second 6 with a 9, gives 54, which is an increase of 27 from the original 36.\n", + "8. 54 fits perfectly our need for an increase of 27, as it brings up our total from 72 to 99.\n", + "\n", + "The solution is: 9 * 4 + 6 * 9 = 99\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This time, lets teach the agent a viable strategy for solving such tasks." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "Here's some advice that may help:\n", + "1. Let E denote the original number on the right.\n", + "2. Let F denote the final number on the right.\n", + "3. Calculate the difference between the two, G = F - E.\n", + "4. Examine the numbers on the left one by one until finding one that divides evenly into G, where negative integers are allowed.\n", + "5. Calculate J = G / H. This is the number of times that H divides into G.\n", + "6. Verify that J is an integer, and that H * J = G.\n", + "7. Find the number on the left which is multiplied by H, and call it K.\n", + "8. Change K to K + J.\n", + "9. Recompute the value on the left, and verify that it equals F.\n", + "Finally, write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "Let's follow the advice to solve the problem step by step:\n", + "\n", + "1. The original number on the right, E, is 72.\n", + "2. The final number on the right, F, is 99.\n", + "3. The difference G = F - E = 99 - 72 = 27.\n", + "4. Looking at the integers on the left side one by one, we see that 9 and 3 (if we allow negative integers, -3 instead of 6) can divide 27 evenly. Let's choose 9. So, H = 9.\n", + "5. Now, we calculate J = G / H = 27 / 9 = 3.\n", + "6. J (3) is indeed an integer, and H (9) * J (3) = G (27).\n", + "7. We find the number that is multiplied by 9 on the left, and call it K. Let's consider the first '9' which multiplies 4, hence K = 4.\n", + "8. Now, we change K to K + J = 4 + 3 = 7.\n", + "9. Finally, recomputing the value on the left we have: 9 * 7 + 6 * 6 = 63 + 36 = 99.\n", + "\n", + "So, the solution is: \"The solution is: 9 * 7 + 6 * 6\". The '4' in the original equation has been modified to '7'.\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "Here's some advice that may help:\n", + "1. Let E denote the original number on the right.\n", + "2. Let F denote the final number on the right.\n", + "3. Calculate the difference between the two, G = F - E.\n", + "4. Examine the numbers on the left one by one until finding one that divides evenly into G, where negative integers are allowed.\n", + "5. Calculate J = G / H. This is the number of times that H divides into G.\n", + "6. Verify that J is an integer, and that H * J = G.\n", + "7. Find the number on the left which is multiplied by H, and call it K.\n", + "8. Change K to K + J.\n", + "9. Recompute the value on the left, and verify that it equals F.\n", + "Finally, write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(agent, message=text, clear_history=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When given this advice, GPT-4 can solve such problems over 95% of the time. But can it remember the strategy so the user doesn't have to explain it over and over? As before, let's start a new chat to find out." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "Here's the step-by-step solution:\n", + "\n", + "1. Firstly, E is the original number on the right which is 72.\n", + "2. The final number F on the right should be 99.\n", + "3. The difference G between the two is F - E = 99 - 72 = 27.\n", + "4. On the left hand side, we have 9, 4, 6, and 6. Checking these one by one, we find that 9 divides evenly into 27.\n", + "5. We calculate J = G / H, where H is 9. J = 27 / 9 = 3. This is the number of times that H divides into G.\n", + "6. Both J is an integer and H * J = G, as the equality 9 * 3 = 27 holds.\n", + "7. In the original equation, 9 (which we denoted as H) is multiplied by 4. So, K is 4.\n", + "8. We modify the 4 to K + J which becomes 4 + 3 = 7.\n", + "9. Now, the left hand side becomes 9 * 7 + 6 * 6 = 63 + 36 = 99, which is equal to F. Hence, we have successfully modified only one integer on the left to make the equation correct.\n", + "\n", + "Finally, the solution is: \"The solution is: 9 * 7 + 6 * 6\".\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "agent.learn_from_recent_user_comments()\n", + "\n", + "text = \"\"\"Consider the identity: \n", + "9 * 4 + 6 * 6 = 72\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(agent, message=text, clear_history=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As a final check, let's test the agent's newly learned skill on a separate instance of the task." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33muser\u001b[0m (to agent):\n", + "\n", + "Consider the identity: \n", + "8 * 3 + 7 * 9 = 87\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 59?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "\u001b[33magent\u001b[0m (to user):\n", + "\n", + "Let's use the advice and think step-by-step:\n", + "\n", + "1. Let E denote the original number on the right. Here, E = 87.\n", + "2. Let F denote the final number on the right. Here, F = 59.\n", + "3. Calculate the difference between the two, G = F - E = 59 - 87 = -28.\n", + "4. Examine the numbers on the left one by one until finding one that divides evenly into G. Here, 7 divides evenly into -28, therefore H = 7.\n", + "5. Calculate J = G / H. This is the number of times that H divides into G. Here, J = -28 / 7 = -4.\n", + "6. Verify that J is an integer, and that H * J = G. -4 is indeed an integer and 7 * -4 = -28 which equals to G.\n", + "7. Find the number on the left which is multiplied by H, and call it K. Here K = 9.\n", + "8. Change K to K + J. Here K_new = 9 + (-4) = 5.\n", + "9. Recompute the value on the left, and verify that it equals F. The new left hand side becomes 8 * 3 + 5 * 7 = 59 which is equal to F.\n", + "\n", + "Finally, the solution is: \"The solution is: 8 * 3 + 5 * 7\".\n", + "\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "text = \"\"\"Consider the identity: \n", + "8 * 3 + 7 * 9 = 87\n", + "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 59?\n", + "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", + "\"\"\"\n", + "user.initiate_chat(agent, message=text, clear_history=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "flaml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 317efb67984..74215aac248 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -11,7 +11,7 @@ def colored(x, *args, **kwargs): verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. -assert_on_error = True # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. +assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. # Specify the model to use by uncommenting one of the following lines. @@ -142,7 +142,7 @@ def test_task_advice_pair(): total_num_errors, total_num_tests = 0, 0 - num_trials = 1 + num_trials = 10 for trial in range(num_trials): num_errors, num_tests = test_question_answer_pair() total_num_errors += num_errors From ec49ae01d30672778079f5c761a3e35a26fd6619 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 17 Oct 2023 15:02:23 -0700 Subject: [PATCH 31/50] minor edits --- notebook/agentchat_Teachability.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/notebook/agentchat_Teachability.ipynb b/notebook/agentchat_Teachability.ipynb index 8151a002807..2503fc16761 100644 --- a/notebook/agentchat_Teachability.ipynb +++ b/notebook/agentchat_Teachability.ipynb @@ -15,9 +15,9 @@ "source": [ "# Chatting with TeachableAgent\n", "\n", - "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long. In future chats, the user is forced to repeat any instructions that will be needed.\n", + "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long. In subsequent chats, the user is forced to repeat any necessary instructions over and over.\n", "\n", - "This notebook demonstrates a few ways of using `TeachableAgent`, which persists user teachings across chat boundaries in long-term memory (a vector database). Memory is save to disk at the end of each chat, and loaded from disk at the start of the next. Instead of copying all of memory into the chat context, which would take up valuable space, individual memories (called memos) are retrieved into the chat context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in future chats.\n", + "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is save to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in later chats.\n", "\n", "## Requirements\n", "\n", @@ -79,7 +79,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "It first looks for environment variable \"OAI_CONFIG_LIST\" which needs to be a valid json string. If that variable is not found, it then looks for a json file named \"OAI_CONFIG_LIST\". It filters the configs by models (you can filter by other keys as well). After application of this particular filter, only the gpt-4 and gpt-3.5-turbo models are kept.\n", + "It first looks for environment variable \"OAI_CONFIG_LIST\" which needs to be a valid json string. If that variable is not found, it then looks for a json file named \"OAI_CONFIG_LIST\". It filters the configs by models (you can filter by other keys as well). After application of this particular filter, only the gpt-4 models are kept.\n", "\n", "The config list looks like the following:\n", "```python\n", @@ -414,7 +414,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "But that's too long. So let's teach the agent our preference." + "But that's too long and unstructured. So let's teach the agent our preference." ] }, { @@ -513,7 +513,7 @@ "## Learning hard tasks\n", "Finally, let's extend the agent's capabilities by teaching it how to accomplish challenging tasks. \n", "\n", - "The [Sparks of AGI](https://arxiv.org/abs/2303.12712) paper evaluated GPT-4 on math problems like the following, which it could solve only 32% of the time." + "The [Sparks of AGI](https://arxiv.org/abs/2303.12712) paper evaluated GPT-4 on math problems like the following, which it could only solve 32% of the time." ] }, { @@ -567,7 +567,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This time, lets teach the agent a viable strategy for solving such tasks." + "This time, lets teach the agent a reliable strategy for solving such tasks." ] }, { From d16c3684d261eebc6a2f01a9cbd9e76379b8dff3 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Tue, 17 Oct 2023 17:27:14 -0700 Subject: [PATCH 32/50] cleanup --- autogen/agentchat/contrib/teachable_agent.py | 11 +- .../agentchat/contrib/text_analyzer_agent.py | 12 +- notebook/agentchat_Teachability.ipynb | 176 ++++++++---------- test/agentchat/test_teachable_agent.py | 31 +-- .../test_teachable_agent_interactively.py | 58 ++++++ 5 files changed, 154 insertions(+), 134 deletions(-) create mode 100644 test/agentchat/test_teachable_agent_interactively.py diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 5b7980b7148..6d4a887b782 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -97,13 +97,6 @@ def _generate_teachable_assistant_reply( # Get the last user message. user_text = messages[-1]['content'] - # To help an interactive user test memory, clear the chat history if the user says "new chat". - if user_text == 'new chat': - self.clear_history() - self.learn_from_recent_user_comments() - print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) - return True, 'New chat started.' - # This is a normal user turn. Keep track of it for potential storage later. self.user_comments.append(user_text) @@ -136,7 +129,7 @@ def consider_memo_storage(self, comment): """Decides whether to store something from one user comment in the DB.""" # Check for a problem-solution pair. response = self.analyze(comment, - "Does the TEXT contain a task or problem to solve? Answer with just one word, yes or no.") + "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.") if 'yes' in response.lower(): # Can we extract advice? advice = self.analyze(comment, @@ -178,7 +171,7 @@ def consider_memo_retrieval(self, comment): # Next, if the comment involves a task, then extract and generalize the task before using it as the lookup key. response = self.analyze(comment, - "Does the TEXT contain a task or problem to solve? Answer with just one word, yes or no.") + "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.") if 'yes' in response.lower(): if self.verbosity >= 1: print(colored('\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS', 'light_yellow')) diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index dc3ad13c38f..15b26fce55a 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -3,13 +3,18 @@ from autogen.agentchat.assistant_agent import ConversableAgent from typing import Callable, Dict, Optional, Union, List, Tuple, Any +system_message = """You are an expert in text analysis. +The user will give you TEXT to analyze. +The user will give you analysis INSTRUCTIONS copied twice, at both the beginning and the end. +You will follow these INSTRUCTIONS in analyzing the TEXT, then give the results of your expert analysis in the format requested.""" + class TextAnalyzerAgent(ConversableAgent): """Text Analysis agent, a subclass of ConversableAgent designed to answer specific questions about text.""" def __init__( self, name: str, - system_message: Optional[str] = "You are an expert in text analysis.", + system_message: Optional[str] = system_message, llm_config: Optional[Union[Dict, bool]] = None, is_termination_msg: Optional[Callable[[Dict], bool]] = None, max_consecutive_auto_reply: Optional[int] = None, @@ -52,10 +57,11 @@ def analyze_text(self, text_to_analyze, analysis_instructions): """Analyzes the given text as instructed, and returns the analysis.""" # Assemble the message. text_to_analyze = '# TEXT\n' + text_to_analyze + '\n' - analysis_instructions = '# Instructions\n' + analysis_instructions + '\n' + analysis_instructions = '# INSTRUCTIONS\n' + analysis_instructions + '\n' msg_text = '\n'.join([analysis_instructions, text_to_analyze, analysis_instructions]) # Repeat the instructions. messages = self._oai_system_message + [{"role": "user", "content": msg_text}] # Generate and return the analysis string. response = oai.ChatCompletion.create(context=None, messages=messages, use_cache=self.use_cache, **self.llm_config) - return oai.ChatCompletion.extract_text_or_function_call(response)[0] + output_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] + return output_text diff --git a/notebook/agentchat_Teachability.ipynb b/notebook/agentchat_Teachability.ipynb index 2503fc16761..fefc54b6c42 100644 --- a/notebook/agentchat_Teachability.ipynb +++ b/notebook/agentchat_Teachability.ipynb @@ -19,6 +19,8 @@ "\n", "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is save to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in later chats.\n", "\n", + "This notebook demonstrates how `TeachableAgent` can learn facts, preferences, and skills from users. To interact with `TeachableAgent` yourself, run [test_teachable_agent_interactively.py](../test/agentchat/test_teachable_agent_interactively.py).\n", + "\n", "## Requirements\n", "\n", "AutoGen requires `Python>=3.8`. To run this notebook example, please install the [teachability] option.\n", @@ -157,8 +159,6 @@ " \n", "autogen.ChatCompletion.start_logging()\n", "\n", - "# print(colored(\"\\nLoading previous memory (if any) from disk.\", 'light_cyan'))\n", - "\n", "agent = TeachableAgent(\n", " name=\"agent\",\n", " llm_config=llm_config,\n", @@ -198,7 +198,7 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "I'm sorry, but I currently do not have any information about a \"Vicuna model\" in my database. It could be because \"Vicuna model\" is domain-specific jargon or perhaps a term used in a specific field of study or industry. Could you please provide additional context or specify the field this term is related to? This could help me provide a more accurate response.\n", + "I'm sorry, but as of my last programming, I don't have information regarding any \"Vicuna model\". It could be related to various fields like computer science, economics, or biology, but I would require more specific details about the context in which it is being referred to. If it's a new concept or theory, I might not have that information yet. However, if you have any details about the Vicuna model, I am here to learn and could help more effectively in the future.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -225,13 +225,7 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "I see, thank you for the clarification. As a learning AI model, I wasn't initially aware of this information, but I understand now.\n", - "\n", - "Vicuna is a large language model developed by Meta AI (formerly known as Facebook AI). This model, which has 13 billion parameters, is part of their efforts to advance AI capabilities. \n", - "\n", - "Vicuna is designed to understand and generate human language and follows the path set by models like GPT-3. Nonetheless, the utilization specifics and way how Vicuna model has been trained may vary because methodologies and training datasets used by different organizations might be different. \n", - "\n", - "Remember, the larger the model in terms of parameters, the more potential it has to understand complex language patterns but it also requires more computational resources.\n", + "I apologize for any previous confusion. Given my last update preceding your new information, I wasn't aware of the Vicuna model. The model appears to be a cutting-edge innovation in the AI and language model field. Please note that as an AI, my knowledge is not updated in real-time, and it relies on periodic updates. Your information about Vicuna being a 13B-parameter language model developed by Meta is instrumental, and I'll remember for future conversations. Thank you for providing this information!\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -258,7 +252,7 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "I'm sorry for any confusion, but as of now, I don't have any information on an \"Orca model\" relative to artificial intelligence, computational models, or any similar context. The term \"Orca model\" might be specific to a certain field or a recent development. If it has just been introduced, it's possible that I might not have been updated with this information yet. Could you provide more context or clarify what field this model is used in? This would help me give a more accurate response.\n", + "I'm sorry for the confusion, but as of my last update, I don't have data about the \"Orca\" model. If it's a recent development or discovery, I may not know about it yet. However, if you have any details about the Orca model, I am more than ready to learn and remember, so I can help more effectively in our future conversations.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -285,13 +279,7 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "Thank you for the information. Although I didn't have this knowledge before, I understand now.\n", - "\n", - "Orca is a 13-billion parameter language model developed by Microsoft. Like Vicuna by Meta AI, these large-scale models are part of the recent wave of highly advanced language understanding AI. The purpose of these models is to understand and generate human-like text.\n", - "\n", - "You mentioned that Orca outperforms Vicuna on most tasks, suggesting that the techniques and training regimens used in creating Orca could be advanced or more effective. However, without specific details on their design, training data, and methodology, it would be challenging to do a straightforward comparison.\n", - "\n", - "Please note that advancements and performance in AI models can vary due to the diversity of algorithms, methodologies, training data, and uses. The continuous innovation in this field leads to frequent changes in which models are currently leading in performance.\n", + "Thank you for sharing this information. Based on your input, Orca is a 13-billion parameter language model developed by Microsoft, noted for outperforming Vicuna on most tasks. I'll remember this piece of information for future reference. Please note that at the time of my last update, I was not aware of the Orca model, and I appreciate your help in teaching me about recent advancements in AI and language models.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -306,7 +294,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's consider this the end of our first chat. The following function needs to be called at the end of each chat, so that `TeachableAgent` can store what the user has taught it." + "Let's end our first chat here. The following function needs to be called at the end of each chat, so that `TeachableAgent` can store what the user has taught it." ] }, { @@ -350,11 +338,7 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "The Orca and Vicuna models both have a total of 13B parameters, indicating similar complexity. However, they were developed by different entities, with Orca being developed by Microsoft and Vicuna by Meta.\n", - "\n", - "In a comparative performance perspective, Orca generally outperforms Vicuna on most tasks, according to Microsoft's release notes and testing. The specific metric improvement over Vicuna can vary depending on the nature of the task, the dataset used, and other factors. \n", - "\n", - "Unfortunately, without more specific information, it's difficult to provide a detailed comparison between the two models. Factors like encoding strategies, underlying algorithms, training procedures, and more could also significantly contribute to their performance. You can look up more details in their respective technical documents or ask the developers for more specifics.\n", + "Based on your previous information, both Vicuna and Orca are language models with 13 billion parameters. Vicuna was released by Meta, while Orca was released by Microsoft. The primary comparison between the two, as per your teachings, is that Orca reportedly outperforms Vicuna on most tasks. However, without specific context or detailed benchmarks, I am unable to provide a more comprehensive comparison. It is recommended to look into further details such as model performance on specific tasks, computational efficiency, ease of use, and other factors depending on the intended usage.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -384,7 +368,7 @@ "text": [ "\u001b[33muser\u001b[0m (to agent):\n", "\n", - "Summarize this abstract.\n", + "Please summarize this abstract.\n", "\n", "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", @@ -394,20 +378,20 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "AutoGen is an open-source framework that allows for the creation of language model applications through customizable and conversable agents. These agents can use a combination of language models, human inputs, and various tools across different modes for accomplishing tasks. Developers can define interaction behaviors and also program conversation patterns using both natural language and computer code. AutoGen is useful for building a variety of applications with different complexities. Empirical studies have shown its effectiveness in numerous areas such as mathematics, coding, question answering, operations research, online decision-making, and entertainment.\n", + "AutoGen is an open-source platform developed by Qingyun Wu, Gagan Bansal, Jieye Zhang, and co-authors. It allows developers to create applications using large language models (LLMs) through a customizable multi-agent conversation framework. With AutoGen, developers can define agent behavior, and use a combination of LLMs, human inputs, and resources. They can also create flexible conversation patterns with natural language and computer code. Automotive can be utilized for several domains like mathematics, coding, Q&A, operations research, online decision-making, entertainment, etc. The utility and effectiveness of AutoGen have been confirmed through empirical studies. Although the abstract doesn't mention Orca, the 13B-parameter LLM from Microsoft, the relationship between the two may come up in future discussions as both concern the usage of LLMs.\n", "\n", "--------------------------------------------------------------------------------\n" ] } ], "source": [ - "text = \"\"\"Summarize this abstract.\n", + "text = \"\"\"Please summarize this abstract.\n", "\n", "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(agent, message=text, clear_history=True)" ] }, { @@ -428,8 +412,8 @@ "text": [ "\u001b[33muser\u001b[0m (to agent):\n", "\n", - "Summarize this abstract. \n", - "When summarizing an abstract, I like the summary to contain just three short bullet points: the title, the innovation, and the key empirical results.\n", + "Please summarize this abstract. \n", + "When I'm summarizing an abstract, I try to make the summary contain just three short bullet points: the title, the innovation, and the key empirical results.\n", "\n", "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", @@ -439,23 +423,23 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "- Title: \"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\"\n", - "- Innovation: AutoGen is introduced as an open-source framework, which facilitates developers to construct language-learning model (LLM) applications using multiple conversable agents. These agents can function across various methods utilizing LLMs, human contributions, and tools. Moreover, it allows developers to outline flexible agent interaction behaviors using both natural language and computer code.\n", - "- Key Empirical Results: Empirical studies highlight the efficacy of AutoGen and its usability across domains like mathematics, coding, question answering, operations research, online decision-making, and entertainment.\n", + "- Title: \"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\" \n", + "- Innovation: AutoGen is a novel open-source framework developed to allow the creation of LLM applications via multiple, customizable and conversable agents. The agents can operate in various combinations of LLMs, human inputs and tools. Both natural language and code can be used to define their interaction behaviors flexibly.\n", + "- Key Empirical Results: AutoGen has proven effective in multiple applications across wide-ranging domains including mathematics, coding, question answering, operations research, online decision-making, and entertainment.\n", "\n", "--------------------------------------------------------------------------------\n" ] } ], "source": [ - "text = \"\"\"Summarize this abstract. \n", - "When summarizing an abstract, I like the summary to contain just three short bullet points: the title, the innovation, and the key empirical results.\n", + "text = \"\"\"Please summarize this abstract. \n", + "When I'm summarizing an abstract, I try to make the summary contain just three short bullet points: the title, the innovation, and the key empirical results.\n", "\n", "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(agent, message=text, clear_history=True)" ] }, { @@ -478,7 +462,7 @@ "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", "\u001b[33muser\u001b[0m (to agent):\n", "\n", - "Summarize this abstract.\n", + "Please summarize this abstract.\n", "\n", "Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", "Sébastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, Yi Zhang\n", @@ -487,9 +471,11 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "- Title: \"Sparks of Artificial General Intelligence: Early experiments with GPT-4\"\n", - "- Innovation: This research focuses on the early version of OpenAI’s GPT-4, which shows more general intelligence than prior AI models, being proficient across different domains such as mathematics, coding, vision, medicine, and more without needing specific prompting.\n", - "- Key Empirical Results: Experimentation showed GPT-4's performance was close to human-level or even superior to previous models like ChatGPT across various tasks. Despite its limitations, it suggests the potential of serving as an initial stepping stone towards artificial general intelligence (AGI), potentially requiring a different paradigm from just next-word prediction.\n", + "Title: Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "\n", + "Innovation: The paper reports the investigation of GPT-4, an AI model by OpenAI, claiming it to possess more general intelligence than previous models. It's capable of performing a wide range of tasks spanning different domains including mathematics, coding, vision, medicine, law, and psychology, delivering near-human performance in many of these tasks without needing specific prompts.\n", + "\n", + "Key Empirical Results: GPT-4 greatly outperforms prior models like ChatGPT and provides a glimpse into what early versions of artificial general intelligence (AGI) systems may look like. Despite its depth and breadth of capabilities, the paper acknowledges the need to re-evaluate the current predictive model approach and potentially pursue a new paradigm for AGI. It closes with a consideration of societal implications of such technology and future research directions.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -498,7 +484,7 @@ "source": [ "agent.learn_from_recent_user_comments()\n", "\n", - "text = \"\"\"Summarize this abstract.\n", + "text = \"\"\"Please summarize this abstract.\n", "\n", "Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", "Sébastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, Yi Zhang\n", @@ -511,14 +497,14 @@ "metadata": {}, "source": [ "## Learning hard tasks\n", - "Finally, let's extend the agent's capabilities by teaching it how to accomplish challenging tasks. \n", + "Finally, let's extend the agent's capabilities by teaching it how to accomplish a challenging task. \n", "\n", "The [Sparks of AGI](https://arxiv.org/abs/2303.12712) paper evaluated GPT-4 on math problems like the following, which it could only solve 32% of the time." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -536,19 +522,22 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "Certainly, let's figure out the solution.\n", - "\n", "Plan:\n", - "1. The goal is to increase the current total (72) to 99 by changing only one integer on the left-hand side.\n", - "2. This would require an increase of 27.\n", - "3. Look at each integer and think about how changing it might help reach the goal.\n", - "4. Intuitively, increasing an integer involved in multiplication will have a more significant impact than increasing an integer involved in addition.\n", - "5. Remember, the integers are multiplied first due to the order of operations.\n", - "6. Starting with the \"9 * 4\", replacing 9 with any number would result in multiples of 4. But, none of these within a reasonable range would give an increase of 27.\n", - "7. Checking \"6 * 6\", we realize that replacing the second 6 with a 9, gives 54, which is an increase of 27 from the original 36.\n", - "8. 54 fits perfectly our need for an increase of 27, as it brings up our total from 72 to 99.\n", + "1. First, I'll assess the starting equation to see what figures we're working with.\n", + "2. Then, I'll manipulate each number one at a time to see if the equation could result in 99.\n", + "3. I cross-reference the results to check my solution.\n", + "\n", + "Here's how I'll break down the equation:\n", + "\n", + "Original equation: 9 * 4 + 6 * 6 = 72\n", + "Goal: Change one integer so that the right side equals 99.\n", "\n", - "The solution is: 9 * 4 + 6 * 9 = 99\n", + "1. Firstly, I start with looking at the integer '9' from the multiplicative term '9 * 4'. The product is 36. If I were to increase this product to a value closer to 99 without going over, I would find the number 24. Because 24 * 4 is 96, which is closest to 99. The equation becomes: 24 * 4 + 6 * 6 = 96 + 36 = 132 which exceeds 99. \n", + "2. Second, I check '4' from the multiplicative term '9 * 4'. Its product is 36 as well. If I increase this product, the number I find is 22 because 9*22 is 198, which is furthest from 99. Temporarily, this option seems incorrect.\n", + "3. Thirdly, I consider '6' from the additive term '6 * 6'. Its product is 36. I realize I can't increase this product without exceeding 99.\n", + "4. Finally, I evaluate the other '6' from the multiplicative term '6 * 6'. Its product is 36 as well. Here, I found the solution. If I change this '6' to '3', the sum of the products becomes 99. 9*4 + 6*3 = 36 + 18 = 54.\n", + "\n", + "Solution: The equation that satisfies the criteria is 9 * 4 + 6 * 3.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -560,7 +549,7 @@ "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(agent, message=text, clear_history=True)" ] }, { @@ -572,7 +561,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -602,19 +591,18 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "Let's follow the advice to solve the problem step by step:\n", - "\n", - "1. The original number on the right, E, is 72.\n", - "2. The final number on the right, F, is 99.\n", - "3. The difference G = F - E = 99 - 72 = 27.\n", - "4. Looking at the integers on the left side one by one, we see that 9 and 3 (if we allow negative integers, -3 instead of 6) can divide 27 evenly. Let's choose 9. So, H = 9.\n", - "5. Now, we calculate J = G / H = 27 / 9 = 3.\n", - "6. J (3) is indeed an integer, and H (9) * J (3) = G (27).\n", - "7. We find the number that is multiplied by 9 on the left, and call it K. Let's consider the first '9' which multiplies 4, hence K = 4.\n", - "8. Now, we change K to K + J = 4 + 3 = 7.\n", - "9. Finally, recomputing the value on the left we have: 9 * 7 + 6 * 6 = 63 + 36 = 99.\n", - "\n", - "So, the solution is: \"The solution is: 9 * 7 + 6 * 6\". The '4' in the original equation has been modified to '7'.\n", + "Step-by-step plan:\n", + "1. Original number on the right (E): 72\n", + "2. Final number on the right (F): 99\n", + "3. The difference between the two (G): 99 - 72 = 27\n", + "4. Go through each digit on the left one by one and check if it divides evenly into G: - 9 does not divide evenly into 27, 4 does not divide evenly into 27, 6 divides evenly into 27.\n", + "5. With H being 6 (the number that divides into G), calculate J = G / H: J = 27 / 6 = 4.5. But J must be an integer, so H = 6 is not suitable.\n", + "6. Continue to the next digit which is also 6. The same process yields J = 27 / 6 = 4.5, which is not an integer, so it's not a solution.\n", + "7. The only number left that could divide 27 without fractional remainder is 9.\n", + "8. Testing H = 9, gives J = G / H = 27 / 9 = 3. So we need to increase the number that is multiplied by 9 by 3, which is, increase 4 to 7. \n", + "9. Equation becomes: 9 * 7 + 6 * 6 = 63 + 36 = 99, which equals F.\n", + " \n", + "The solution is: 9 * 7 + 6 * 6.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -650,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -670,19 +658,19 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "Here's the step-by-step solution:\n", + "Alright, following your guidelines, we can start this step-by-step:\n", "\n", - "1. Firstly, E is the original number on the right which is 72.\n", - "2. The final number F on the right should be 99.\n", - "3. The difference G between the two is F - E = 99 - 72 = 27.\n", - "4. On the left hand side, we have 9, 4, 6, and 6. Checking these one by one, we find that 9 divides evenly into 27.\n", - "5. We calculate J = G / H, where H is 9. J = 27 / 9 = 3. This is the number of times that H divides into G.\n", - "6. Both J is an integer and H * J = G, as the equality 9 * 3 = 27 holds.\n", - "7. In the original equation, 9 (which we denoted as H) is multiplied by 4. So, K is 4.\n", - "8. We modify the 4 to K + J which becomes 4 + 3 = 7.\n", - "9. Now, the left hand side becomes 9 * 7 + 6 * 6 = 63 + 36 = 99, which is equal to F. Hence, we have successfully modified only one integer on the left to make the equation correct.\n", + "1. Let E be the original number on the right, which is 72.\n", + "2. Let F be the final number on the right, which is 99.\n", + "3. Calculate the difference between the two: G = F - E = 99 - 72 = 27.\n", + "4. Examine the numbers on the left one by one until finding one that divides evenly into G. The numbers we have are: 9, 4, 6, 6. The number 9 divides evenly into 27.\n", + "5. Calculate J = G / H. Here H is the number that divides evenly into G, so H is 9. Thus, J = 27 / 9 = 3.\n", + "6. Verifying that J is an integer and that H * J = G: 9 * 3 = 27 and 27 = G (which is correct).\n", + "7. Find the number on the left which is multiplied by H, and call it K. Here, the number multiplied by 9 is 4, so K = 4.\n", + "8. Change K to K + J. Here, K + J is 4 + 3 = 7.\n", + "9. Recompute the value on the left and verify that it now equals F: 9 * 7 + 6 * 6 = 63 + 36 = 99, which is equal to F.\n", "\n", - "Finally, the solution is: \"The solution is: 9 * 7 + 6 * 6\".\n", + "So the solution is: 9 * 7 + 6 * 6.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -708,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -726,19 +714,19 @@ "--------------------------------------------------------------------------------\n", "\u001b[33magent\u001b[0m (to user):\n", "\n", - "Let's use the advice and think step-by-step:\n", + "Alright, let's proceed with this new identity following the same step-by-step process:\n", "\n", - "1. Let E denote the original number on the right. Here, E = 87.\n", - "2. Let F denote the final number on the right. Here, F = 59.\n", - "3. Calculate the difference between the two, G = F - E = 59 - 87 = -28.\n", - "4. Examine the numbers on the left one by one until finding one that divides evenly into G. Here, 7 divides evenly into -28, therefore H = 7.\n", - "5. Calculate J = G / H. This is the number of times that H divides into G. Here, J = -28 / 7 = -4.\n", - "6. Verify that J is an integer, and that H * J = G. -4 is indeed an integer and 7 * -4 = -28 which equals to G.\n", - "7. Find the number on the left which is multiplied by H, and call it K. Here K = 9.\n", - "8. Change K to K + J. Here K_new = 9 + (-4) = 5.\n", - "9. Recompute the value on the left, and verify that it equals F. The new left hand side becomes 8 * 3 + 5 * 7 = 59 which is equal to F.\n", + "1. Let E be the original number on the right, 87.\n", + "2. Let F be the final number on the right, 59.\n", + "3. Calculate the difference G between the two: G = F - E = 59 - 87 = -28.\n", + "4. Let’s examine the numbers on the left side of the equation to find one that divides evenly into G: 8, 3, 7, 9. 7 divides evenly into -28.\n", + "5. Calculate J = G / H, here G is -28 and H is 7, so J = -28 / 7 = -4.\n", + "6. Verifying that J is an integer and H * J = G, we see that 7 * -4 = -28, which equals G (which is correct).\n", + "7. Find the number on the left which is multiplied by 7, and this number is 9, so we call this K.\n", + "8. Now update K to K + J, so 9 + (-4) = 5.\n", + "9. Recompute the value on the left and verify that it equals F: 8 * 3 + 7 * 5 = 24 + 35 = 59.\n", "\n", - "Finally, the solution is: \"The solution is: 8 * 3 + 5 * 7\".\n", + "So the solution is: 8 * 3 + 7 * 5.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -750,7 +738,7 @@ "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 59?\n", "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(agent, message=text, clear_history=False)" ] } ], diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 74215aac248..6254e2903cd 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -1,5 +1,4 @@ -import sys -from autogen import ConversableAgent, UserProxyAgent, config_list_from_json +from autogen import ConversableAgent, config_list_from_json from autogen.agentchat.contrib.teachable_agent import TeachableAgent @@ -53,25 +52,6 @@ def check_agent_response(agent, user, correct_answer): return 0 -def interact_freely_with_user(): - """Starts a free-form chat between the user and a TeachableAgent.""" - - # Create the agents. - print(colored("\nLoading previous memory (if any) from disk.", 'light_cyan')) - agent = create_teachable_agent(reset_db=False) - user = UserProxyAgent("user", human_input_mode="ALWAYS") - - # Start the chat. - print(colored("\nTo clear the context and start a new chat, type 'new chat'.", 'light_cyan')) - user.initiate_chat(agent, message="Hi") - - # Let the agent remember things that should be learned from this chat. - agent.learn_from_recent_user_comments() - - # Wrap up. - agent.close_db() - - def test_question_answer_pair(): """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" print(colored("\nTEST QUESTION-ANSWER PAIRS", 'light_cyan')) @@ -134,15 +114,10 @@ def test_task_advice_pair(): if __name__ == "__main__": - """Runs the unit tests from above, unless the user adds 'interactive' or 'i' as a commandline argument.""" - if len(sys.argv) > 1: - if sys.argv[1].startswith('i'): - interact_freely_with_user() - exit() - + """Runs this file's unit tests.""" total_num_errors, total_num_tests = 0, 0 - num_trials = 10 + num_trials = 1 # Set to a higher number to get a more accurate error rate. for trial in range(num_trials): num_errors, num_tests = test_question_answer_pair() total_num_errors += num_errors diff --git a/test/agentchat/test_teachable_agent_interactively.py b/test/agentchat/test_teachable_agent_interactively.py new file mode 100644 index 00000000000..3818aad8347 --- /dev/null +++ b/test/agentchat/test_teachable_agent_interactively.py @@ -0,0 +1,58 @@ +from autogen import UserProxyAgent, config_list_from_json +from autogen.agentchat.contrib.teachable_agent import TeachableAgent + + +try: + from termcolor import colored +except ImportError: + def colored(x, *args, **kwargs): + return x + + +verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. +recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. + +# Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input. +filter_dict={"model": ["gpt-4-0613"]} + + +def create_teachable_agent(reset_db=False): + """Instantiates a TeachableAgent using the settings from the top of this file.""" + # Load LLM inference endpoints from an env variable or a file + # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints + # and OAI_CONFIG_LIST_sample + config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) + agent = TeachableAgent( + name="agent", + llm_config={ + "config_list": config_list, + "request_timeout": 120}, + teach_config={ + "verbosity": verbosity, + "reset_db": reset_db, + "path_to_db_dir": "./tmp/interactive/teachable_agent_db", + "recall_threshold": recall_threshold}) + return agent + + +def interact_freely_with_user(): + """Starts a free-form chat between the user and TeachableAgent.""" + + # Create the agents. + print(colored("\nLoading previous memory (if any) from disk.", 'light_cyan')) + agent = create_teachable_agent(reset_db=False) + user = UserProxyAgent("user", human_input_mode="ALWAYS") + + # Start the chat. + agent.initiate_chat(user, message="Greetings, I'm a teachable user assistant! What's on your mind today?") + + # Let the agent remember things that should be learned from this chat. + agent.learn_from_recent_user_comments() + + # Wrap up. + agent.close_db() + + +if __name__ == "__main__": + """Lets the user test TeachableAgent interactively.""" + interact_freely_with_user() From 2467673ee77079e50dee2026b8fac1c6c7ec0459 Mon Sep 17 00:00:00 2001 From: riloynd Date: Tue, 17 Oct 2023 21:05:06 -0700 Subject: [PATCH 33/50] linter fixes --- autogen/agentchat/contrib/teachable_agent.py | 211 +++++++++++------- .../agentchat/contrib/text_analyzer_agent.py | 17 +- test/agentchat/test_teachable_agent.py | 44 ++-- .../test_teachable_agent_interactively.py | 13 +- 4 files changed, 181 insertions(+), 104 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 6d4a887b782..6a295ec6374 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -12,16 +12,20 @@ try: from termcolor import colored except ImportError: + def colored(x, *args, **kwargs): return x class TeachableAgent(ConversableAgent): """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings.""" + def __init__( self, name="Agent", # default set to Assistant - system_message: Optional[str] = "You are a helpful AI assistant that remembers user teachings from prior chats.", + system_message: Optional[ + str + ] = "You are a helpful AI assistant that remembers user teachings from prior chats.", llm_config: Optional[Union[Dict, bool]] = None, is_termination_msg: Optional[Callable[[Dict], bool]] = None, max_consecutive_auto_reply: Optional[int] = None, @@ -95,7 +99,7 @@ def _generate_teachable_assistant_reply( messages = self._oai_messages[sender] # In case of a direct call. # Get the last user message. - user_text = messages[-1]['content'] + user_text = messages[-1]["content"] # This is a normal user turn. Keep track of it for potential storage later. self.user_comments.append(user_text) @@ -106,7 +110,7 @@ def _generate_teachable_assistant_reply( if new_user_text != user_text: # Make a copy of the message list, and replace the last user message with the new one. messages = messages.copy() - messages[-1]['content'] = new_user_text + messages[-1]["content"] = new_user_text ctxt = messages[-1].pop("context", None) # This peels off any "context" message from the list. msgs = self._oai_system_message + messages @@ -117,7 +121,7 @@ def _generate_teachable_assistant_reply( def learn_from_recent_user_comments(self): """Reviews the user comments from the last chat, and decides what teachings to store as memos.""" - print(colored("\nREVIEWING CHAT FOR USER TEACHINGS TO REMEMBER", 'light_yellow')) + print(colored("\nREVIEWING CHAT FOR USER TEACHINGS TO REMEMBER", "light_yellow")) # Look at each user turn. if len(self.user_comments) > 0: for comment in self.user_comments: @@ -128,37 +132,50 @@ def learn_from_recent_user_comments(self): def consider_memo_storage(self, comment): """Decides whether to store something from one user comment in the DB.""" # Check for a problem-solution pair. - response = self.analyze(comment, - "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.") - if 'yes' in response.lower(): + response = self.analyze( + comment, + "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.", + ) + if "yes" in response.lower(): # Can we extract advice? - advice = self.analyze(comment, - "Briefly copy any advice from the TEXT that may be useful for a similar but different task in the future. But if no advice is present, just respond with \'none\'.") - if 'none' not in advice.lower(): + advice = self.analyze( + comment, + "Briefly copy any advice from the TEXT that may be useful for a similar but different task in the future. But if no advice is present, just respond with 'none'.", + ) + if "none" not in advice.lower(): # Yes. Extract the task. - task = self.analyze(comment, - "Briefly copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.") + task = self.analyze( + comment, + "Briefly copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.", + ) # Generalize the task. - general_task = self.analyze(task, - "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.") + general_task = self.analyze( + task, + "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.", + ) # Add the task-advice (problem-solution) pair to the vector DB. if self.verbosity >= 1: - print(colored("\nREMEMBER THIS TASK-ADVICE PAIR", 'light_yellow')) + print(colored("\nREMEMBER THIS TASK-ADVICE PAIR", "light_yellow")) self.memo_store.add_input_output_pair(general_task, advice) # Check for information to be learned. - response = self.analyze(comment, - "Does the TEXT contain information that could be committed to memory? Answer with just one word, yes or no.") - if 'yes' in response.lower(): + response = self.analyze( + comment, + "Does the TEXT contain information that could be committed to memory? Answer with just one word, yes or no.", + ) + if "yes" in response.lower(): # Yes. What question would this information answer? - question = self.analyze(comment, - "Imagine that the user forgot this information in the TEXT. How would they ask you for this information? Include no other text in your response.") + question = self.analyze( + comment, + "Imagine that the user forgot this information in the TEXT. How would they ask you for this information? Include no other text in your response.", + ) # Extract the information. - answer = self.analyze(comment, - "Copy the information from the TEXT that should be committed to memory. Add no explanation.") + answer = self.analyze( + comment, "Copy the information from the TEXT that should be committed to memory. Add no explanation." + ) # Add the question-answer pair to the vector DB. if self.verbosity >= 1: - print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", 'light_yellow')) + print(colored("\nREMEMBER THIS QUESTION-ANSWER PAIR", "light_yellow")) self.memo_store.add_input_output_pair(question, answer) def consider_memo_retrieval(self, comment): @@ -166,21 +183,26 @@ def consider_memo_retrieval(self, comment): # First, use the user comment directly as the lookup key. if self.verbosity >= 1: - print(colored('\nLOOK FOR RELEVANT MEMOS, AS QUESTION-ANSWER PAIRS', 'light_yellow')) + print(colored("\nLOOK FOR RELEVANT MEMOS, AS QUESTION-ANSWER PAIRS", "light_yellow")) memo_list = self.retrieve_relevant_memos(comment) # Next, if the comment involves a task, then extract and generalize the task before using it as the lookup key. - response = self.analyze(comment, - "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.") - if 'yes' in response.lower(): + response = self.analyze( + comment, + "Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.", + ) + if "yes" in response.lower(): if self.verbosity >= 1: - print(colored('\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS', 'light_yellow')) + print(colored("\nLOOK FOR RELEVANT MEMOS, AS TASK-ADVICE PAIRS", "light_yellow")) # Extract the task. - task = self.analyze(comment, - "Copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice.") + task = self.analyze( + comment, "Copy just the task from the TEXT, then stop. Don't solve it, and don't include any advice." + ) # Generalize the task. - general_task = self.analyze(task, - "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.") + general_task = self.analyze( + task, + "Summarize very briefly, in general terms, the type of task described in the TEXT. Leave out details that might not appear in a similar problem.", + ) # Append any relevant memos. memo_list.extend(self.retrieve_relevant_memos(general_task)) @@ -192,14 +214,16 @@ def consider_memo_retrieval(self, comment): def retrieve_relevant_memos(self, input_text): """Returns semantically related memos from the DB.""" - memo_list = self.memo_store.get_related_memos(input_text, n_results=self.max_num_retrievals, threshold=self.recall_threshold) + memo_list = self.memo_store.get_related_memos( + input_text, n_results=self.max_num_retrievals, threshold=self.recall_threshold + ) if self.verbosity >= 1: # Was anything retrieved? if len(memo_list) == 0: # No. Look at the closest memo. - print(colored('\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD:', 'light_yellow')) - memo = self.memo_store.get_nearest_memo(input_text) + print(colored("\nTHE CLOSEST MEMO IS BEYOND THE THRESHOLD:", "light_yellow")) + self.memo_store.get_nearest_memo(input_text) print() # Print a blank line. The memo details were printed by get_nearest_memo(). # Create a list of just the memo output_text strings. @@ -208,14 +232,14 @@ def retrieve_relevant_memos(self, input_text): def concatenate_memo_texts(self, memo_list): """Concatenates the memo texts into a single string for inclusion in the chat context.""" - memo_texts = '' + memo_texts = "" if len(memo_list) > 0: info = "\n# Memories that might help\n" for memo in memo_list: info = info + "- " + memo + "\n" if self.verbosity >= 1: - print(colored('\nMEMOS APPENDED TO LAST USER MESSAGE...\n' + info + '\n', 'light_yellow')) - memo_texts = memo_texts + '\n' + info + print(colored("\nMEMOS APPENDED TO LAST USER MESSAGE...\n" + info + "\n", "light_yellow")) + memo_texts = memo_texts + "\n" + info return memo_texts def analyze(self, text_to_analyze, analysis_instructions): @@ -223,7 +247,9 @@ def analyze(self, text_to_analyze, analysis_instructions): if self.verbosity >= 2: # Use the messaging mechanism so that the analyzer's messages are included in the printed chat. self.analyzer.reset() # Clear the analyzer's list of messages. - self.send(recipient=self.analyzer, message=text_to_analyze, request_reply=False) # Put the message in the analyzer's list. + self.send( + recipient=self.analyzer, message=text_to_analyze, request_reply=False + ) # Put the message in the analyzer's list. self.send(recipient=self.analyzer, message=analysis_instructions, request_reply=True) # Request the reply. return self.last_message(self.analyzer)["content"] else: @@ -231,7 +257,7 @@ def analyze(self, text_to_analyze, analysis_instructions): return self.analyzer.analyze_text(text_to_analyze, analysis_instructions) -class MemoStore(): +class MemoStore: """ Provides memory storage and retrieval for a TeachableAgent, using a vector database. Each DB entry (called a memo) is a pair of strings: an input text and an output text. @@ -239,6 +265,7 @@ class MemoStore(): The output text may be an answer to the question, or advice for how to perform the task. Vector embeddings are currently provided by chromadb's default sentence encoder. """ + def __init__(self, verbosity, reset, path_to_db_dir): """ Args: @@ -251,44 +278,47 @@ def __init__(self, verbosity, reset, path_to_db_dir): # Load or create the vector DB on disk. settings = Settings( - anonymized_telemetry=False, - allow_reset=True, - is_persistent=True, - persist_directory=path_to_db_dir) + anonymized_telemetry=False, allow_reset=True, is_persistent=True, persist_directory=path_to_db_dir + ) self.db_client = chromadb.Client(settings) self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. if reset: self.reset_db() # Load or create the associated memo dict on disk. - self.path_to_dict = os.path.join(path_to_db_dir, 'uid_text_dict.pkl') + self.path_to_dict = os.path.join(path_to_db_dir, "uid_text_dict.pkl") self.uid_text_dict = {} self.last_memo_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - print(colored("\nLOADING MEMORY FROM DISK", 'light_green')) - print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) - with open(self.path_to_dict, 'rb') as f: + print(colored("\nLOADING MEMORY FROM DISK", "light_green")) + print(colored(" Location = {}".format(self.path_to_dict), "light_green")) + with open(self.path_to_dict, "rb") as f: self.uid_text_dict = pickle.load(f) self.last_memo_id = len(self.uid_text_dict) if self.verbosity >= 3: self.list_memos() def list_memos(self): - print(colored("LIST OF MEMOS", 'light_green')) + print(colored("LIST OF MEMOS", "light_green")) for uid, text in self.uid_text_dict.items(): input_text, output_text = text - print(colored(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text), 'light_green')) + print( + colored( + " ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text), + "light_green", + ) + ) def close(self): """Saves the dict to disk.""" - print(colored("\nSAVING MEMORY TO DISK", 'light_green')) - print(colored(" Location = {}".format(self.path_to_dict), 'light_green')) - with open(self.path_to_dict, 'wb') as file: + print(colored("\nSAVING MEMORY TO DISK", "light_green")) + print(colored(" Location = {}".format(self.path_to_dict), "light_green")) + with open(self.path_to_dict, "wb") as file: pickle.dump(self.uid_text_dict, file) def reset_db(self): """Forces immediate deletion of the DB's contents, in memory and on disk.""" - print(colored("\nCLEARING MEMORY", 'light_green')) + print(colored("\nCLEARING MEMORY", "light_green")) self.db_client.delete_collection("memos") self.vec_db = self.db_client.create_collection("memos") self.uid_text_dict = {} @@ -299,20 +329,32 @@ def add_input_output_pair(self, input_text, output_text): self.vec_db.add(documents=[input_text], ids=[str(self.last_memo_id)]) self.uid_text_dict[str(self.last_memo_id)] = input_text, output_text if self.verbosity >= 1: - print(colored("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}".format( - self.last_memo_id, input_text, output_text), 'light_green')) + print( + colored( + "\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}".format( + self.last_memo_id, input_text, output_text + ), + "light_green", + ) + ) if self.verbosity >= 3: self.list_memos() def get_nearest_memo(self, query_text): """Retrieves the nearest memo to the given query text.""" results = self.vec_db.query(query_texts=[query_text], n_results=1) - uid, input_text, distance = results['ids'][0][0], results['documents'][0][0], results['distances'][0][0] + uid, input_text, distance = results["ids"][0][0], results["documents"][0][0], results["distances"][0][0] input_text_2, output_text = self.uid_text_dict[uid] assert input_text == input_text_2 if self.verbosity >= 1: - print(colored("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance), 'light_green')) + print( + colored( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance + ), + "light_green", + ) + ) return input_text, output_text, distance def get_related_memos(self, query_text, n_results, threshold): @@ -321,33 +363,50 @@ def get_related_memos(self, query_text, n_results, threshold): n_results = len(self.uid_text_dict) results = self.vec_db.query(query_texts=[query_text], n_results=n_results) memos = [] - num_results = len(results['ids'][0]) + num_results = len(results["ids"][0]) for i in range(num_results): - uid, input_text, distance = results['ids'][0][i], results['documents'][0][i], results['distances'][0][i] + uid, input_text, distance = results["ids"][0][i], results["documents"][0][i], results["distances"][0][i] if distance < threshold: input_text_2, output_text = self.uid_text_dict[uid] assert input_text == input_text_2 if self.verbosity >= 1: - print(colored( - "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance), 'light_green')) + print( + colored( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance + ), + "light_green", + ) + ) memos.append((input_text, output_text, distance)) return memos def prepopulate(self): """Adds a few arbitrary examples to the vector DB, just to make retrieval less trivial.""" if self.verbosity >= 1: - print(colored("\nPREPOPULATING MEMORY", 'light_green')) + print(colored("\nPREPOPULATING MEMORY", "light_green")) examples = [] - examples.append({'text': 'When I say papers I mean research papers, which are typically pdfs.', 'label': 'yes'}) - examples.append({'text': 'Please verify that each paper you listed actually uses langchain.', 'label': 'no'}) - examples.append({'text': 'Tell gpt the output should still be latex code.', 'label': 'no'}) - examples.append({'text': 'Hint: convert pdfs to text and then answer questions based on them.', 'label': 'yes'}) - examples.append({'text': 'To create a good PPT, include enough content to make it interesting.', 'label': 'yes'}) - examples.append({'text': 'No, for this case the columns should be aspects and the rows should be frameworks.', 'label': 'no'}) - examples.append({'text': 'When writing code, remember to include any libraries that are used.', 'label': 'yes'}) - examples.append({'text': 'Please summarize the papers by Eric Horvitz on bounded rationality.', 'label': 'no'}) - examples.append({'text': 'Compare the h-index of Daniel Weld and Oren Etzioni.', 'label': 'no'}) - examples.append({'text': 'Double check to be sure that the columns in a table correspond to what was asked for.', 'label': 'yes'}) + examples.append({"text": "When I say papers I mean research papers, which are typically pdfs.", "label": "yes"}) + examples.append({"text": "Please verify that each paper you listed actually uses langchain.", "label": "no"}) + examples.append({"text": "Tell gpt the output should still be latex code.", "label": "no"}) + examples.append({"text": "Hint: convert pdfs to text and then answer questions based on them.", "label": "yes"}) + examples.append( + {"text": "To create a good PPT, include enough content to make it interesting.", "label": "yes"} + ) + examples.append( + { + "text": "No, for this case the columns should be aspects and the rows should be frameworks.", + "label": "no", + } + ) + examples.append({"text": "When writing code, remember to include any libraries that are used.", "label": "yes"}) + examples.append({"text": "Please summarize the papers by Eric Horvitz on bounded rationality.", "label": "no"}) + examples.append({"text": "Compare the h-index of Daniel Weld and Oren Etzioni.", "label": "no"}) + examples.append( + { + "text": "Double check to be sure that the columns in a table correspond to what was asked for.", + "label": "yes", + } + ) for example in examples: - self.add_input_output_pair(example['text'], example['label']) + self.add_input_output_pair(example["text"], example["label"]) diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index 15b26fce55a..ae501e74587 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -11,6 +11,7 @@ class TextAnalyzerAgent(ConversableAgent): """Text Analysis agent, a subclass of ConversableAgent designed to answer specific questions about text.""" + def __init__( self, name: str, @@ -33,7 +34,7 @@ def __init__( **kwargs, ) self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply) - self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. + self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. def _analyze_in_reply( self, @@ -51,17 +52,21 @@ def _analyze_in_reply( assert len(messages) == 2 # Delegate to the analysis method. - return True, self.analyze_text(messages[0]['content'], messages[1]['content']) + return True, self.analyze_text(messages[0]["content"], messages[1]["content"]) def analyze_text(self, text_to_analyze, analysis_instructions): """Analyzes the given text as instructed, and returns the analysis.""" # Assemble the message. - text_to_analyze = '# TEXT\n' + text_to_analyze + '\n' - analysis_instructions = '# INSTRUCTIONS\n' + analysis_instructions + '\n' - msg_text = '\n'.join([analysis_instructions, text_to_analyze, analysis_instructions]) # Repeat the instructions. + text_to_analyze = "# TEXT\n" + text_to_analyze + "\n" + analysis_instructions = "# INSTRUCTIONS\n" + analysis_instructions + "\n" + msg_text = "\n".join( + [analysis_instructions, text_to_analyze, analysis_instructions] + ) # Repeat the instructions. messages = self._oai_system_message + [{"role": "user", "content": msg_text}] # Generate and return the analysis string. - response = oai.ChatCompletion.create(context=None, messages=messages, use_cache=self.use_cache, **self.llm_config) + response = oai.ChatCompletion.create( + context=None, messages=messages, use_cache=self.use_cache, **self.llm_config + ) output_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] return output_text diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 6254e2903cd..24d9730fd50 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -5,6 +5,7 @@ try: from termcolor import colored except ImportError: + def colored(x, *args, **kwargs): return x @@ -17,7 +18,7 @@ def colored(x, *args, **kwargs): # filter_dict={"model": ["gpt-4-0613"]} # filter_dict={"model": ["gpt-3.5-turbo-0613"]} # filter_dict={"model": ["gpt-4"]} -filter_dict={"model": ["gpt-35-turbo-16k"]} +filter_dict = {"model": ["gpt-35-turbo-16k"]} def create_teachable_agent(reset_db=False): @@ -28,14 +29,14 @@ def create_teachable_agent(reset_db=False): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) agent = TeachableAgent( name="agent", - llm_config={ - "config_list": config_list, - "request_timeout": 120}, + llm_config={"config_list": config_list, "request_timeout": 120}, teach_config={ "verbosity": verbosity, "reset_db": reset_db, "path_to_db_dir": "./tmp/teachable_agent_db", - "recall_threshold": recall_threshold}) + "recall_threshold": recall_threshold, + }, + ) return agent @@ -43,18 +44,18 @@ def check_agent_response(agent, user, correct_answer): """Checks whether the agent's response contains the correct answer, and returns the number of errors (1 or 0).""" agent_response = user.last_message(agent)["content"] if correct_answer not in agent_response: - print(colored(f"\nTEST FAILED: EXPECTED ANSWER {correct_answer} NOT FOUND IN AGENT RESPONSE", 'light_red')) + print(colored(f"\nTEST FAILED: EXPECTED ANSWER {correct_answer} NOT FOUND IN AGENT RESPONSE", "light_red")) if assert_on_error: assert correct_answer in agent_response return 1 else: - print(colored(f"\nTEST PASSED: EXPECTED ANSWER {correct_answer} FOUND IN AGENT RESPONSE", 'light_cyan')) + print(colored(f"\nTEST PASSED: EXPECTED ANSWER {correct_answer} FOUND IN AGENT RESPONSE", "light_cyan")) return 0 def test_question_answer_pair(): """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" - print(colored("\nTEST QUESTION-ANSWER PAIRS", 'light_cyan')) + print(colored("\nTEST QUESTION-ANSWER PAIRS", "light_cyan")) num_errors, num_tests = 0, 0 agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") @@ -66,7 +67,10 @@ def test_question_answer_pair(): user.initiate_chat(recipient=agent, message="What is the twist of 5 and 7?") # Explain the terminology to the agent. - user.send(recipient=agent, message="Actually, the twist of two or more numbers is their product minus their sum. Try again.") + user.send( + recipient=agent, + message="Actually, the twist of two or more numbers is their product minus their sum. Try again.", + ) num_errors += check_agent_response(agent, user, "23") num_tests += 1 @@ -74,7 +78,7 @@ def test_question_answer_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) + print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") num_errors += check_agent_response(agent, user, "35") num_tests += 1 @@ -86,7 +90,7 @@ def test_question_answer_pair(): def test_task_advice_pair(): """Tests whether the agent can recall and use advice after being taught a task-advice pair in a previous chat.""" - print(colored("\nTEST TASK-ADVICE PAIRS", 'light_cyan')) + print(colored("\nTEST TASK-ADVICE PAIRS", "light_cyan")) num_errors, num_tests = 0, 0 agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") @@ -95,7 +99,10 @@ def test_task_advice_pair(): agent.prepopulate_db() # Ask the agent to do something, and provide some helpful advice. - user.initiate_chat(recipient=agent, message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.") + user.initiate_chat( + recipient=agent, + message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.", + ) num_errors += check_agent_response(agent, user, "23") num_tests += 1 @@ -103,7 +110,7 @@ def test_task_advice_pair(): agent.learn_from_recent_user_comments() # Now start a new chat to clear the context, and require the agent to use its new knowledge. - print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", 'light_cyan')) + print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") num_errors += check_agent_response(agent, user, "35") num_tests += 1 @@ -127,9 +134,14 @@ def test_task_advice_pair(): total_num_errors += num_errors total_num_tests += num_tests - print(colored(f"\nTRIAL {trial + 1} OF {num_trials} FINISHED", 'light_cyan')) + print(colored(f"\nTRIAL {trial + 1} OF {num_trials} FINISHED", "light_cyan")) if total_num_errors == 0: - print(colored("\nTEACHABLE AGENT TESTS FINISHED WITH ZERO ERRORS", 'light_cyan')) + print(colored("\nTEACHABLE AGENT TESTS FINISHED WITH ZERO ERRORS", "light_cyan")) else: - print(colored(f"\nTEACHABLE AGENT TESTS FINISHED WITH {total_num_errors} / {total_num_tests} TOTAL ERRORS ({100.0 * total_num_errors / total_num_tests}%)", 'light_red')) + print( + colored( + f"\nTEACHABLE AGENT TESTS FINISHED WITH {total_num_errors} / {total_num_tests} TOTAL ERRORS ({100.0 * total_num_errors / total_num_tests}%)", + "light_red", + ) + ) diff --git a/test/agentchat/test_teachable_agent_interactively.py b/test/agentchat/test_teachable_agent_interactively.py index 3818aad8347..d79939c409c 100644 --- a/test/agentchat/test_teachable_agent_interactively.py +++ b/test/agentchat/test_teachable_agent_interactively.py @@ -5,6 +5,7 @@ try: from termcolor import colored except ImportError: + def colored(x, *args, **kwargs): return x @@ -13,7 +14,7 @@ def colored(x, *args, **kwargs): recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. # Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input. -filter_dict={"model": ["gpt-4-0613"]} +filter_dict = {"model": ["gpt-4-0613"]} def create_teachable_agent(reset_db=False): @@ -24,14 +25,14 @@ def create_teachable_agent(reset_db=False): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) agent = TeachableAgent( name="agent", - llm_config={ - "config_list": config_list, - "request_timeout": 120}, + llm_config={"config_list": config_list, "request_timeout": 120}, teach_config={ "verbosity": verbosity, "reset_db": reset_db, "path_to_db_dir": "./tmp/interactive/teachable_agent_db", - "recall_threshold": recall_threshold}) + "recall_threshold": recall_threshold, + }, + ) return agent @@ -39,7 +40,7 @@ def interact_freely_with_user(): """Starts a free-form chat between the user and TeachableAgent.""" # Create the agents. - print(colored("\nLoading previous memory (if any) from disk.", 'light_cyan')) + print(colored("\nLoading previous memory (if any) from disk.", "light_cyan")) agent = create_teachable_agent(reset_db=False) user = UserProxyAgent("user", human_input_mode="ALWAYS") From bfc0bc0bf8e1966ce34ab2534669872c90b70134 Mon Sep 17 00:00:00 2001 From: riloynd Date: Tue, 17 Oct 2023 21:36:08 -0700 Subject: [PATCH 34/50] Skip tests that fail to import openai --- test/agentchat/test_teachable_agent.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 24d9730fd50..da8d4321e11 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -55,6 +55,11 @@ def check_agent_response(agent, user, correct_answer): def test_question_answer_pair(): """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" + try: + import openai + except ImportError: + return + print(colored("\nTEST QUESTION-ANSWER PAIRS", "light_cyan")) num_errors, num_tests = 0, 0 agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. @@ -90,6 +95,11 @@ def test_question_answer_pair(): def test_task_advice_pair(): """Tests whether the agent can recall and use advice after being taught a task-advice pair in a previous chat.""" + try: + import openai + except ImportError: + return + print(colored("\nTEST TASK-ADVICE PAIRS", "light_cyan")) num_errors, num_tests = 0, 0 agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. From 4d9035190ef414fa46c6f7c20b8351b77b6bafd5 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 18 Oct 2023 18:26:40 -0700 Subject: [PATCH 35/50] Address reviewer feedback. --- autogen/agentchat/contrib/teachable_agent.py | 15 +++++++++------ .../agentchat/contrib/text_analyzer_agent.py | 4 ++-- notebook/agentchat_Teachability.ipynb | 18 ++++++++++-------- setup.py | 2 +- test/agentchat/test_teachable_agent.py | 6 +++--- .../test_teachable_agent_interactively.py | 2 +- website/docs/Examples/AutoGen-AgentChat.md | 1 + website/docs/Use-Cases/agent_chat.md | 1 + 8 files changed, 28 insertions(+), 21 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 6a295ec6374..98b9b1afbc0 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -18,7 +18,8 @@ def colored(x, *args, **kwargs): class TeachableAgent(ConversableAgent): - """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings.""" + """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings. + In this class, the term 'user' refers to any caller (human or not) sending messages to this agent.""" def __init__( self, @@ -45,7 +46,7 @@ def __init__( - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. - use_cache (Optional, bool): True to skip LLM calls made previously by relying on cached responses. Default False. - - recall_threshold (Optional, float): The distance threshold for retrieving memos from the DB. Default 1.5. + - recall_threshold (Optional, float): The maximum distance for retrieved memos, where 0.0 is exact match. Default 1.5. Larger values allow more (but less relevant) memos to be recalled. - max_num_retrievals (Optional, int): The maximum number of memos to retrieve from the DB. Default 10. **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). """ @@ -59,7 +60,7 @@ def __init__( llm_config=llm_config, **kwargs, ) - self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply) + self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply, 1) self._teach_config = {} if teach_config is None else teach_config self.verbosity = self._teach_config.get("verbosity", 0) @@ -94,12 +95,14 @@ def _generate_teachable_assistant_reply( Uses self.analyzer to make decisions about memo storage and retrieval. """ if self.llm_config is False: - return False, None # Return if no LLM was provided. + raise ValueError("TeachableAgent requires self.llm_config to be set in its base class.") if messages is None: messages = self._oai_messages[sender] # In case of a direct call. # Get the last user message. user_text = messages[-1]["content"] + if not isinstance(user_text, str): + raise ValueError("TeachableAgent currently assumes that the message content is a simple string. This error serves to flag a test case for relaxing this assumption.") # This is a normal user turn. Keep track of it for potential storage later. self.user_comments.append(user_text) @@ -119,7 +122,7 @@ def _generate_teachable_assistant_reply( return True, response_text - def learn_from_recent_user_comments(self): + def learn_from_user_feedback(self): """Reviews the user comments from the last chat, and decides what teachings to store as memos.""" print(colored("\nREVIEWING CHAT FOR USER TEACHINGS TO REMEMBER", "light_yellow")) # Look at each user turn. @@ -263,7 +266,7 @@ class MemoStore: Each DB entry (called a memo) is a pair of strings: an input text and an output text. The input text may be a question, or a task to perform. The output text may be an answer to the question, or advice for how to perform the task. - Vector embeddings are currently provided by chromadb's default sentence encoder. + Vector embeddings are currently provided by Chroma's default Sentence Transformers. """ def __init__(self, verbosity, reset, path_to_db_dir): diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index ae501e74587..cc3b01b3cfc 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -33,7 +33,7 @@ def __init__( llm_config=llm_config, **kwargs, ) - self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply) + self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply, 1) self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. def _analyze_in_reply( @@ -46,7 +46,7 @@ def _analyze_in_reply( Assumes exactly two messages containing the text to analyze and the analysis instructions respectively. See TeachableAgent.analyze for an example of how to use this method.""" if self.llm_config is False: - return False, None # Return if no LLM was provided. + raise ValueError("TextAnalyzerAgent requires self.llm_config to be set in its base class.") if messages is None: messages = self._oai_messages[sender] # In case of a direct call. assert len(messages) == 2 diff --git a/notebook/agentchat_Teachability.ipynb b/notebook/agentchat_Teachability.ipynb index fefc54b6c42..d409a8b2c69 100644 --- a/notebook/agentchat_Teachability.ipynb +++ b/notebook/agentchat_Teachability.ipynb @@ -5,7 +5,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\"Open" + "\"Open" ] }, { @@ -17,15 +17,17 @@ "\n", "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long. In subsequent chats, the user is forced to repeat any necessary instructions over and over.\n", "\n", - "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is save to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in later chats.\n", + "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is saved to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in later chats.\n", + "\n", + "In making decisions about memo storage and retrieval, `TeachableAgent` calls an instance of `TextAnalyzerAgent` to analyze pieces of text in several different ways. This adds extra LLM calls involving a relatively small number of tokens. These calls can add a few seconds to the time a user waits for a response.\n", "\n", "This notebook demonstrates how `TeachableAgent` can learn facts, preferences, and skills from users. To interact with `TeachableAgent` yourself, run [test_teachable_agent_interactively.py](../test/agentchat/test_teachable_agent_interactively.py).\n", "\n", "## Requirements\n", "\n", - "AutoGen requires `Python>=3.8`. To run this notebook example, please install the [teachability] option.\n", + "AutoGen requires `Python>=3.8`. To run this notebook example, please install the [teachable] option.\n", "```bash\n", - "pip install \"pyautogen[teachability]\"\n", + "pip install \"pyautogen[teachable]\"\n", "```" ] }, @@ -36,7 +38,7 @@ "outputs": [], "source": [ "%%capture --no-stderr\n", - "# %pip install \"pyautogen[teachability]" + "# %pip install \"pyautogen[teachable]" ] }, { @@ -312,7 +314,7 @@ } ], "source": [ - "agent.learn_from_recent_user_comments()" + "agent.learn_from_user_feedback()" ] }, { @@ -482,7 +484,7 @@ } ], "source": [ - "agent.learn_from_recent_user_comments()\n", + "agent.learn_from_user_feedback()\n", "\n", "text = \"\"\"Please summarize this abstract.\n", "\n", @@ -677,7 +679,7 @@ } ], "source": [ - "agent.learn_from_recent_user_comments()\n", + "agent.learn_from_user_feedback()\n", "\n", "text = \"\"\"Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", diff --git a/setup.py b/setup.py index 6a5b2ba6bda..05cabc3ef67 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ "blendsearch": ["flaml[blendsearch]"], "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"], "retrievechat": ["chromadb", "tiktoken", "sentence_transformers", "pypdf"], - "teachability": ["chromadb"], + "teachable": ["chromadb"], }, classifiers=[ "Programming Language :: Python :: 3", diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index da8d4321e11..a4b6bae3bf4 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -18,7 +18,7 @@ def colored(x, *args, **kwargs): # filter_dict={"model": ["gpt-4-0613"]} # filter_dict={"model": ["gpt-3.5-turbo-0613"]} # filter_dict={"model": ["gpt-4"]} -filter_dict = {"model": ["gpt-35-turbo-16k"]} +filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} def create_teachable_agent(reset_db=False): @@ -80,7 +80,7 @@ def test_question_answer_pair(): num_tests += 1 # Let the agent remember things that should be learned from this chat. - agent.learn_from_recent_user_comments() + agent.learn_from_user_feedback() # Now start a new chat to clear the context, and require the agent to use its new knowledge. print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) @@ -117,7 +117,7 @@ def test_task_advice_pair(): num_tests += 1 # Let the agent remember things that should be learned from this chat. - agent.learn_from_recent_user_comments() + agent.learn_from_user_feedback() # Now start a new chat to clear the context, and require the agent to use its new knowledge. print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) diff --git a/test/agentchat/test_teachable_agent_interactively.py b/test/agentchat/test_teachable_agent_interactively.py index d79939c409c..c68535fa0d9 100644 --- a/test/agentchat/test_teachable_agent_interactively.py +++ b/test/agentchat/test_teachable_agent_interactively.py @@ -48,7 +48,7 @@ def interact_freely_with_user(): agent.initiate_chat(user, message="Greetings, I'm a teachable user assistant! What's on your mind today?") # Let the agent remember things that should be learned from this chat. - agent.learn_from_recent_user_comments() + agent.learn_from_user_feedback() # Wrap up. agent.close_db() diff --git a/website/docs/Examples/AutoGen-AgentChat.md b/website/docs/Examples/AutoGen-AgentChat.md index 1360aa9f5d8..fb86f3c204a 100644 --- a/website/docs/Examples/AutoGen-AgentChat.md +++ b/website/docs/Examples/AutoGen-AgentChat.md @@ -16,4 +16,5 @@ Links to notebook examples: * [Automated Complex Task Solving by Group Chat (with 6 group member agents and 1 manager agent)](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_research.ipynb) * [Automated Continual Learning from New Data](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb) * [Teach Agents New Skills & Reuse via Automated Chat](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teaching.ipynb) +* [Teach Agents New Facts, User Preferences and Skills Beyond Coding](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_Teachability.ipynb) * [Automated Code Generation and Question Answering with Retrieval Augemented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb) diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md index 5f8d22619a4..a5dd26706d0 100644 --- a/website/docs/Use-Cases/agent_chat.md +++ b/website/docs/Use-Cases/agent_chat.md @@ -92,6 +92,7 @@ The figure below shows six examples of applications built using AutoGen. * [Automated Complex Task Solving by Group Chat (with 6 group member agents and 1 manager agent)](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_research.ipynb) * [Automated Continual Learning from New Data](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb) * [Teach Agents New Skills & Reuse via Automated Chat](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teaching.ipynb) +* [Teach Agents New Facts, User Preferences and Skills Beyond Coding](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_Teachability.ipynb) * [Automated Code Generation and Question Answering with Retrieval Augemented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb) From 88d4f951cd1aa9717d8b90bfb09e0d37d9b1e10c Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Wed, 18 Oct 2023 18:31:06 -0700 Subject: [PATCH 36/50] lint --- autogen/agentchat/contrib/teachable_agent.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 98b9b1afbc0..20ce5c8fc77 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -102,7 +102,9 @@ def _generate_teachable_assistant_reply( # Get the last user message. user_text = messages[-1]["content"] if not isinstance(user_text, str): - raise ValueError("TeachableAgent currently assumes that the message content is a simple string. This error serves to flag a test case for relaxing this assumption.") + raise ValueError( + "TeachableAgent currently assumes that the message content is a simple string. This error serves to flag a test case for relaxing this assumption." + ) # This is a normal user turn. Keep track of it for potential storage later. self.user_comments.append(user_text) From f1248edd7d6e2e298f5c89a9a1220db0e42bfc95 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 10:50:33 -0700 Subject: [PATCH 37/50] refactoring --- autogen/agentchat/contrib/teachable_agent.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 20ce5c8fc77..14a31ec8fec 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -99,29 +99,29 @@ def _generate_teachable_assistant_reply( if messages is None: messages = self._oai_messages[sender] # In case of a direct call. - # Get the last user message. - user_text = messages[-1]["content"] - if not isinstance(user_text, str): + # Get the last user turn. + last_message = messages[-1] + user_text = last_message["content"] + if (not isinstance(user_text, str)) or ("context" in last_message): raise ValueError( "TeachableAgent currently assumes that the message content is a simple string. This error serves to flag a test case for relaxing this assumption." ) - # This is a normal user turn. Keep track of it for potential storage later. + # Keep track of this user turn as a potential source of memos later. self.user_comments.append(user_text) + # Consider whether to retrieve something from the DB. if self.memo_store.last_memo_id > 0: - # Consider whether to retrieve something from the DB. new_user_text = self.consider_memo_retrieval(user_text) if new_user_text != user_text: # Make a copy of the message list, and replace the last user message with the new one. messages = messages.copy() messages[-1]["content"] = new_user_text - ctxt = messages[-1].pop("context", None) # This peels off any "context" message from the list. + # Generate a response. msgs = self._oai_system_message + messages - response = oai.ChatCompletion.create(context=ctxt, messages=msgs, use_cache=self.use_cache, **self.llm_config) + response = oai.ChatCompletion.create(messages=msgs, use_cache=self.use_cache, **self.llm_config) response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] - return True, response_text def learn_from_user_feedback(self): From ea1431551973a888b1cf13a4cbc8bd40d507187a Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 11:22:41 -0700 Subject: [PATCH 38/50] Improve wording --- notebook/agentchat_Teachability.ipynb | 4 ++-- test/agentchat/test_teachable_agent.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/notebook/agentchat_Teachability.ipynb b/notebook/agentchat_Teachability.ipynb index d409a8b2c69..e0323e436c5 100644 --- a/notebook/agentchat_Teachability.ipynb +++ b/notebook/agentchat_Teachability.ipynb @@ -498,8 +498,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Learning hard tasks\n", - "Finally, let's extend the agent's capabilities by teaching it how to accomplish a challenging task. \n", + "## Learning new skills\n", + "Finally, let's extend the agent's capabilities by teaching it a new skill for accomplishing a challenging type of task. \n", "\n", "The [Sparks of AGI](https://arxiv.org/abs/2303.12712) paper evaluated GPT-4 on math problems like the following, which it could only solve 32% of the time." ] diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index a4b6bae3bf4..34dd66c8c9f 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -94,7 +94,7 @@ def test_question_answer_pair(): def test_task_advice_pair(): - """Tests whether the agent can recall and use advice after being taught a task-advice pair in a previous chat.""" + """Tests whether the agent can demonstrate a new skill after being taught a task-advice pair in a previous chat.""" try: import openai except ImportError: From aef35be13d35e73ff348588cdd7854c454b099c1 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 16:54:27 -0700 Subject: [PATCH 39/50] Improve code coverage. --- notebook/agentchat_Teachability.ipynb | 68 +++++++++---------- ...tively.py => chat_with_teachable_agent.py} | 0 test/agentchat/test_teachable_agent.py | 45 ++++++------ 3 files changed, 58 insertions(+), 55 deletions(-) rename test/agentchat/{test_teachable_agent_interactively.py => chat_with_teachable_agent.py} (100%) diff --git a/notebook/agentchat_Teachability.ipynb b/notebook/agentchat_Teachability.ipynb index e0323e436c5..23f3a53a715 100644 --- a/notebook/agentchat_Teachability.ipynb +++ b/notebook/agentchat_Teachability.ipynb @@ -15,13 +15,13 @@ "source": [ "# Chatting with TeachableAgent\n", "\n", - "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long. In subsequent chats, the user is forced to repeat any necessary instructions over and over.\n", + "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long for the LLM to handle effectively. In subsequent chats, the user is forced to repeat any necessary instructions over and over.\n", "\n", "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is saved to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in later chats.\n", "\n", "In making decisions about memo storage and retrieval, `TeachableAgent` calls an instance of `TextAnalyzerAgent` to analyze pieces of text in several different ways. This adds extra LLM calls involving a relatively small number of tokens. These calls can add a few seconds to the time a user waits for a response.\n", "\n", - "This notebook demonstrates how `TeachableAgent` can learn facts, preferences, and skills from users. To interact with `TeachableAgent` yourself, run [test_teachable_agent_interactively.py](../test/agentchat/test_teachable_agent_interactively.py).\n", + "This notebook demonstrates how `TeachableAgent` can learn facts, preferences, and skills from users. To chat with `TeachableAgent` yourself, run [chat_with_teachable_agent.py](../test/agentchat/chat_with_teachable_agent.py).\n", "\n", "## Requirements\n", "\n", @@ -132,8 +132,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[92m\n", - "CLEARING MEMORY\u001b[0m\n" + "\u001B[92m\n", + "CLEARING MEMORY\u001B[0m\n" ] } ], @@ -193,12 +193,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "What is the Vicuna model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "I'm sorry, but as of my last programming, I don't have information regarding any \"Vicuna model\". It could be related to various fields like computer science, economics, or biology, but I would require more specific details about the context in which it is being referred to. If it's a new concept or theory, I might not have that information yet. However, if you have any details about the Vicuna model, I am here to learn and could help more effectively in the future.\n", "\n", @@ -220,12 +220,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Vicuna is a 13B-parameter language model released by Meta.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "I apologize for any previous confusion. Given my last update preceding your new information, I wasn't aware of the Vicuna model. The model appears to be a cutting-edge innovation in the AI and language model field. Please note that as an AI, my knowledge is not updated in real-time, and it relies on periodic updates. Your information about Vicuna being a 13B-parameter language model developed by Meta is instrumental, and I'll remember for future conversations. Thank you for providing this information!\n", "\n", @@ -247,12 +247,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "What is the Orca model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "I'm sorry for the confusion, but as of my last update, I don't have data about the \"Orca\" model. If it's a recent development or discovery, I may not know about it yet. However, if you have any details about the Orca model, I am more than ready to learn and remember, so I can help more effectively in our future conversations.\n", "\n", @@ -274,12 +274,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "Thank you for sharing this information. Based on your input, Orca is a 13-billion parameter language model developed by Microsoft, noted for outperforming Vicuna on most tasks. I'll remember this piece of information for future reference. Please note that at the time of my last update, I was not aware of the Orca model, and I appreciate your help in teaching me about recent advancements in AI and language models.\n", "\n", @@ -308,8 +308,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[93m\n", - "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n" + "\u001B[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001B[0m\n" ] } ], @@ -333,12 +333,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "How does the Vicuna model compare to the Orca model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "Based on your previous information, both Vicuna and Orca are language models with 13 billion parameters. Vicuna was released by Meta, while Orca was released by Microsoft. The primary comparison between the two, as per your teachings, is that Orca reportedly outperforms Vicuna on most tasks. However, without specific context or detailed benchmarks, I am unable to provide a more comprehensive comparison. It is recommended to look into further details such as model performance on specific tasks, computational efficiency, ease of use, and other factors depending on the intended usage.\n", "\n", @@ -368,7 +368,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Please summarize this abstract.\n", "\n", @@ -378,7 +378,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "AutoGen is an open-source platform developed by Qingyun Wu, Gagan Bansal, Jieye Zhang, and co-authors. It allows developers to create applications using large language models (LLMs) through a customizable multi-agent conversation framework. With AutoGen, developers can define agent behavior, and use a combination of LLMs, human inputs, and resources. They can also create flexible conversation patterns with natural language and computer code. Automotive can be utilized for several domains like mathematics, coding, Q&A, operations research, online decision-making, entertainment, etc. The utility and effectiveness of AutoGen have been confirmed through empirical studies. Although the abstract doesn't mention Orca, the 13B-parameter LLM from Microsoft, the relationship between the two may come up in future discussions as both concern the usage of LLMs.\n", "\n", @@ -412,7 +412,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Please summarize this abstract. \n", "When I'm summarizing an abstract, I try to make the summary contain just three short bullet points: the title, the innovation, and the key empirical results.\n", @@ -423,7 +423,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "- Title: \"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\" \n", "- Innovation: AutoGen is a novel open-source framework developed to allow the creation of LLM applications via multiple, customizable and conversable agents. The agents can operate in various combinations of LLMs, human inputs and tools. Both natural language and code can be used to define their interaction behaviors flexibly.\n", @@ -460,9 +460,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[93m\n", - "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001B[0m\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Please summarize this abstract.\n", "\n", @@ -471,7 +471,7 @@ "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "Title: Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", "\n", @@ -513,7 +513,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -522,7 +522,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "Plan:\n", "1. First, I'll assess the starting equation to see what figures we're working with.\n", @@ -570,7 +570,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -591,7 +591,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "Step-by-step plan:\n", "1. Original number on the right (E): 72\n", @@ -647,9 +647,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[93m\n", - "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001B[0m\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -658,7 +658,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "Alright, following your guidelines, we can start this step-by-step:\n", "\n", @@ -705,7 +705,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to agent):\n", + "\u001B[33muser\u001B[0m (to agent):\n", "\n", "Consider the identity: \n", "8 * 3 + 7 * 9 = 87\n", @@ -714,7 +714,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33magent\u001b[0m (to user):\n", + "\u001B[33magent\u001B[0m (to user):\n", "\n", "Alright, let's proceed with this new identity following the same step-by-step process:\n", "\n", diff --git a/test/agentchat/test_teachable_agent_interactively.py b/test/agentchat/chat_with_teachable_agent.py similarity index 100% rename from test/agentchat/test_teachable_agent_interactively.py rename to test/agentchat/chat_with_teachable_agent.py diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 34dd66c8c9f..b45176618a5 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -10,7 +10,10 @@ def colored(x, *args, **kwargs): return x -verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. +# Set verbosity levels to maximize code coverage. +qa_verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. +skill_verbosity = 3 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. @@ -21,7 +24,7 @@ def colored(x, *args, **kwargs): filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} -def create_teachable_agent(reset_db=False): +def create_teachable_agent(reset_db=False, verbosity=0): """Instantiates a TeachableAgent using the settings from the top of this file.""" # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints @@ -53,16 +56,11 @@ def check_agent_response(agent, user, correct_answer): return 0 -def test_question_answer_pair(): +def use_question_answer_phrasing(): """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" - try: - import openai - except ImportError: - return - - print(colored("\nTEST QUESTION-ANSWER PAIRS", "light_cyan")) + print(colored("\nTEST QUESTION-ANSWER PHRASING", "light_cyan")) num_errors, num_tests = 0, 0 - agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. + agent = create_teachable_agent(reset_db=True, verbosity=qa_verbosity) # For a clean test, clear the agent's memory. user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. @@ -93,16 +91,11 @@ def test_question_answer_pair(): return num_errors, num_tests -def test_task_advice_pair(): +def use_task_advice_pair_phrasing(): """Tests whether the agent can demonstrate a new skill after being taught a task-advice pair in a previous chat.""" - try: - import openai - except ImportError: - return - - print(colored("\nTEST TASK-ADVICE PAIRS", "light_cyan")) + print(colored("\nTEST TASK-ADVICE PHRASING", "light_cyan")) num_errors, num_tests = 0, 0 - agent = create_teachable_agent(reset_db=True) # For a clean test, clear the agent's memory. + agent = create_teachable_agent(reset_db=True, verbosity=skill_verbosity) # For a clean test, clear the agent's memory. user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. @@ -130,17 +123,22 @@ def test_task_advice_pair(): return num_errors, num_tests -if __name__ == "__main__": +def test_all(): """Runs this file's unit tests.""" + try: + import openai + except ImportError: + return + total_num_errors, total_num_tests = 0, 0 num_trials = 1 # Set to a higher number to get a more accurate error rate. for trial in range(num_trials): - num_errors, num_tests = test_question_answer_pair() + num_errors, num_tests = use_question_answer_phrasing() total_num_errors += num_errors total_num_tests += num_tests - num_errors, num_tests = test_task_advice_pair() + num_errors, num_tests = use_task_advice_pair_phrasing() total_num_errors += num_errors total_num_tests += num_tests @@ -155,3 +153,8 @@ def test_task_advice_pair(): "light_red", ) ) + + +if __name__ == "__main__": + """Runs this file's unit tests from the command line.""" + test_all() From a47e5045844d124b953c19eb24568682c19e2817 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 16:58:18 -0700 Subject: [PATCH 40/50] lint --- test/agentchat/test_teachable_agent.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index b45176618a5..5352be647cf 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -95,7 +95,9 @@ def use_task_advice_pair_phrasing(): """Tests whether the agent can demonstrate a new skill after being taught a task-advice pair in a previous chat.""" print(colored("\nTEST TASK-ADVICE PHRASING", "light_cyan")) num_errors, num_tests = 0, 0 - agent = create_teachable_agent(reset_db=True, verbosity=skill_verbosity) # For a clean test, clear the agent's memory. + agent = create_teachable_agent( + reset_db=True, verbosity=skill_verbosity # For a clean test, clear the agent's memory. + ) user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. From a75617979a973b89e5d6cb686c54f7d6e1b68615 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 17:52:40 -0700 Subject: [PATCH 41/50] Use llm_config to control caching. --- autogen/agentchat/contrib/teachable_agent.py | 4 +--- autogen/agentchat/contrib/text_analyzer_agent.py | 3 +-- test/agentchat/chat_with_teachable_agent.py | 5 +++-- test/agentchat/test_teachable_agent.py | 3 ++- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 14a31ec8fec..955809027a2 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -45,7 +45,6 @@ def __init__( - reset_db (Optional, bool): True to clear the DB before starting. Default False. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. - - use_cache (Optional, bool): True to skip LLM calls made previously by relying on cached responses. Default False. - recall_threshold (Optional, float): The maximum distance for retrieved memos, where 0.0 is exact match. Default 1.5. Larger values allow more (but less relevant) memos to be recalled. - max_num_retrievals (Optional, int): The maximum number of memos to retrieve from the DB. Default 10. **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). @@ -67,7 +66,6 @@ def __init__( self.reset_db = self._teach_config.get("reset_db", False) self.path_to_db_dir = self._teach_config.get("path_to_db_dir", "./tmp/teachable_agent_db") self.prepopulate = self._teach_config.get("prepopulate", True) - self.use_cache = self._teach_config.get("use_cache", False) self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) self.max_num_retrievals = self._teach_config.get("max_num_retrievals", 10) @@ -120,7 +118,7 @@ def _generate_teachable_assistant_reply( # Generate a response. msgs = self._oai_system_message + messages - response = oai.ChatCompletion.create(messages=msgs, use_cache=self.use_cache, **self.llm_config) + response = oai.ChatCompletion.create(messages=msgs, **self.llm_config) response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] return True, response_text diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index cc3b01b3cfc..eb8b96e8719 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -34,7 +34,6 @@ def __init__( **kwargs, ) self.register_reply(Agent, TextAnalyzerAgent._analyze_in_reply, 1) - self.use_cache = False # 1 to skip LLM calls made previously by relying on cached responses. def _analyze_in_reply( self, @@ -66,7 +65,7 @@ def analyze_text(self, text_to_analyze, analysis_instructions): # Generate and return the analysis string. response = oai.ChatCompletion.create( - context=None, messages=messages, use_cache=self.use_cache, **self.llm_config + context=None, messages=messages, **self.llm_config ) output_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] return output_text diff --git a/test/agentchat/chat_with_teachable_agent.py b/test/agentchat/chat_with_teachable_agent.py index c68535fa0d9..6a837cda557 100644 --- a/test/agentchat/chat_with_teachable_agent.py +++ b/test/agentchat/chat_with_teachable_agent.py @@ -12,9 +12,10 @@ def colored(x, *args, **kwargs): verbosity = 0 # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. +use_cache = False # If True, cached LLM calls will be skipped and responses pulled from cache. False exposes LLM non-determinism. # Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input. -filter_dict = {"model": ["gpt-4-0613"]} +filter_dict = {"model": ["gpt-4"]} def create_teachable_agent(reset_db=False): @@ -25,7 +26,7 @@ def create_teachable_agent(reset_db=False): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) agent = TeachableAgent( name="agent", - llm_config={"config_list": config_list, "request_timeout": 120}, + llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, teach_config={ "verbosity": verbosity, "reset_db": reset_db, diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 5352be647cf..4dc116f50c1 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -16,6 +16,7 @@ def colored(x, *args, **kwargs): assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable. recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled. +use_cache = False # If True, cached LLM calls will be skipped and responses pulled from cache. False exposes LLM non-determinism. # Specify the model to use by uncommenting one of the following lines. # filter_dict={"model": ["gpt-4-0613"]} @@ -32,7 +33,7 @@ def create_teachable_agent(reset_db=False, verbosity=0): config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) agent = TeachableAgent( name="agent", - llm_config={"config_list": config_list, "request_timeout": 120}, + llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, teach_config={ "verbosity": verbosity, "reset_db": reset_db, From 96131a8b1351b23ee7c8f7cb459bd771d4e7b3df Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 18:06:01 -0700 Subject: [PATCH 42/50] lowercase notebook name --- ...gentchat_Teachability.ipynb => agentchat_teachability.ipynb} | 2 +- website/docs/Examples/AutoGen-AgentChat.md | 2 +- website/docs/Use-Cases/agent_chat.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename notebook/{agentchat_Teachability.ipynb => agentchat_teachability.ipynb} (99%) diff --git a/notebook/agentchat_Teachability.ipynb b/notebook/agentchat_teachability.ipynb similarity index 99% rename from notebook/agentchat_Teachability.ipynb rename to notebook/agentchat_teachability.ipynb index 23f3a53a715..aa8911f9b92 100644 --- a/notebook/agentchat_Teachability.ipynb +++ b/notebook/agentchat_teachability.ipynb @@ -5,7 +5,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\"Open" + "\"Open" ] }, { diff --git a/website/docs/Examples/AutoGen-AgentChat.md b/website/docs/Examples/AutoGen-AgentChat.md index fb86f3c204a..dbfd070589f 100644 --- a/website/docs/Examples/AutoGen-AgentChat.md +++ b/website/docs/Examples/AutoGen-AgentChat.md @@ -16,5 +16,5 @@ Links to notebook examples: * [Automated Complex Task Solving by Group Chat (with 6 group member agents and 1 manager agent)](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_groupchat_research.ipynb) * [Automated Continual Learning from New Data](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb) * [Teach Agents New Skills & Reuse via Automated Chat](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teaching.ipynb) -* [Teach Agents New Facts, User Preferences and Skills Beyond Coding](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_Teachability.ipynb) +* [Teach Agents New Facts, User Preferences and Skills Beyond Coding](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teachability.ipynb) * [Automated Code Generation and Question Answering with Retrieval Augemented Agents](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_RetrieveChat.ipynb) diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md index c2a80cfb36b..56984f57c86 100644 --- a/website/docs/Use-Cases/agent_chat.md +++ b/website/docs/Use-Cases/agent_chat.md @@ -110,7 +110,7 @@ The figure below shows six examples of applications built using AutoGen. 5. **Agent Teaching and Learning** - Teach Agents New Skills & Reuse via Automated Chat - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/oai_openai_utils.ipynb) - - Teach Agents New Facts, User Preferences and Skills Beyond Coding - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_Teachability.ipynb) + - Teach Agents New Facts, User Preferences and Skills Beyond Coding - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_teachability.ipynb) ## For Further Reading From 08d20000b5cd121937afe474ecc0078fd95d779a Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 18:42:51 -0700 Subject: [PATCH 43/50] Sort out the parameters passed through to ConversableAgent, and supply full docstrings for the others. --- autogen/agentchat/contrib/teachable_agent.py | 31 ++++++++-------- .../agentchat/contrib/text_analyzer_agent.py | 37 +++++++++++++------ 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 955809027a2..934da7f1e06 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -23,23 +23,25 @@ class TeachableAgent(ConversableAgent): def __init__( self, - name="Agent", # default set to Assistant + name="Agent", system_message: Optional[ str ] = "You are a helpful AI assistant that remembers user teachings from prior chats.", - llm_config: Optional[Union[Dict, bool]] = None, - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - max_consecutive_auto_reply: Optional[int] = None, human_input_mode: Optional[str] = "NEVER", - code_execution_config: Optional[Union[Dict, bool]] = False, - teach_config: Optional[Dict] = None, # config for the TeachableAgent + llm_config: Optional[Union[Dict, bool]] = None, + teach_config: Optional[Dict] = None, **kwargs, ): """ Args: - name (str): name of the agent. Default "Assistant". - human_input_mode (str): NEVER ask for human input for this agent. - teach_config (dict or None): config for the TeachableAgent. + name (str): name of the agent. + system_message (str): system message for the ChatCompletion inference. + human_input_mode (str): This agent should NEVER prompt the human for input. + llm_config (dict or False): llm inference configuration. + Please refer to [Completion.create](/docs/reference/oai/completion#create) + for available options. + To disable llm-based auto reply, set to False. + teach_config (dict or None): Additional parameters used by TeachableAgent. To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: - verbosity (Optional, int): # 0 (default) for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. - reset_db (Optional, bool): True to clear the DB before starting. Default False. @@ -50,12 +52,9 @@ def __init__( **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). """ super().__init__( - name, - system_message, - is_termination_msg, - max_consecutive_auto_reply, - human_input_mode, - code_execution_config=code_execution_config, + name=name, + system_message=system_message, + human_input_mode=human_input_mode, llm_config=llm_config, **kwargs, ) @@ -69,7 +68,7 @@ def __init__( self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) self.max_num_retrievals = self._teach_config.get("max_num_retrievals", 10) - self.analyzer = TextAnalyzerAgent("analyzer", llm_config=llm_config) + self.analyzer = TextAnalyzerAgent(llm_config=llm_config) self.memo_store = MemoStore(self.verbosity, self.reset_db, self.path_to_db_dir) self.user_comments = [] # Stores user comments until the end of each chat. diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index eb8b96e8719..75f7bbae662 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -10,26 +10,39 @@ class TextAnalyzerAgent(ConversableAgent): - """Text Analysis agent, a subclass of ConversableAgent designed to answer specific questions about text.""" + """Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed.""" def __init__( self, - name: str, + name="Analyzer", system_message: Optional[str] = system_message, - llm_config: Optional[Union[Dict, bool]] = None, - is_termination_msg: Optional[Callable[[Dict], bool]] = None, - max_consecutive_auto_reply: Optional[int] = None, human_input_mode: Optional[str] = "NEVER", - code_execution_config: Optional[Union[Dict, bool]] = False, + llm_config: Optional[Union[Dict, bool]] = None, **kwargs, ): + """ + Args: + name (str): name of the agent. + system_message (str): system message for the ChatCompletion inference. + human_input_mode (str): This agent should NEVER prompt the human for input. + llm_config (dict or False): llm inference configuration. + Please refer to [Completion.create](/docs/reference/oai/completion#create) + for available options. + To disable llm-based auto reply, set to False. + teach_config (dict or None): Additional parameters used by TeachableAgent. + To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: + - verbosity (Optional, int): # 0 (default) for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + - reset_db (Optional, bool): True to clear the DB before starting. Default False. + - path_to_db_dir (Optional, str): path to the directory where the DB is stored. Default "./tmp/teachable_agent_db" + - prepopulate (Optional, int): True (default) to prepopulate the DB with a set of input-output pairs. + - recall_threshold (Optional, float): The maximum distance for retrieved memos, where 0.0 is exact match. Default 1.5. Larger values allow more (but less relevant) memos to be recalled. + - max_num_retrievals (Optional, int): The maximum number of memos to retrieve from the DB. Default 10. + **kwargs (dict): other kwargs in [ConversableAgent](../conversable_agent#__init__). + """ super().__init__( - name, - system_message, - is_termination_msg, - max_consecutive_auto_reply, - human_input_mode, - code_execution_config=code_execution_config, + name=name, + system_message=system_message, + human_input_mode=human_input_mode, llm_config=llm_config, **kwargs, ) From 3183c2727bb3bf459114ce1e386339b6d55965ab Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Thu, 19 Oct 2023 18:47:55 -0700 Subject: [PATCH 44/50] lint --- autogen/agentchat/contrib/text_analyzer_agent.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index 75f7bbae662..2358c0f360b 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -77,8 +77,6 @@ def analyze_text(self, text_to_analyze, analysis_instructions): messages = self._oai_system_message + [{"role": "user", "content": msg_text}] # Generate and return the analysis string. - response = oai.ChatCompletion.create( - context=None, messages=messages, **self.llm_config - ) + response = oai.ChatCompletion.create(context=None, messages=messages, **self.llm_config) output_text = oai.ChatCompletion.extract_text_or_function_call(response)[0] return output_text From c033af8d7366ad578598c727ab7fefb0c1d953db Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 20 Oct 2023 10:07:43 -0700 Subject: [PATCH 45/50] Allow TextAnalyzerAgent to be given a different llm_config than TeachableAgent. --- autogen/agentchat/contrib/teachable_agent.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 934da7f1e06..fec42b8b769 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -29,6 +29,7 @@ def __init__( ] = "You are a helpful AI assistant that remembers user teachings from prior chats.", human_input_mode: Optional[str] = "NEVER", llm_config: Optional[Union[Dict, bool]] = None, + analyzer_llm_config: Optional[Union[Dict, bool]] = None, teach_config: Optional[Dict] = None, **kwargs, ): @@ -41,6 +42,8 @@ def __init__( Please refer to [Completion.create](/docs/reference/oai/completion#create) for available options. To disable llm-based auto reply, set to False. + analyzer_llm_config (dict or False): llm inference configuration passed to TextAnalyzerAgent. + Given the default setting of None, TeachableAgent passes its own llm_config to TextAnalyzerAgent. teach_config (dict or None): Additional parameters used by TeachableAgent. To use default config, set to None. Otherwise, set to a dictionary with any of the following keys: - verbosity (Optional, int): # 0 (default) for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. @@ -58,8 +61,10 @@ def __init__( llm_config=llm_config, **kwargs, ) + # Register a custom reply function. self.register_reply(Agent, TeachableAgent._generate_teachable_assistant_reply, 1) + # Assemble the parameter settings. self._teach_config = {} if teach_config is None else teach_config self.verbosity = self._teach_config.get("verbosity", 0) self.reset_db = self._teach_config.get("reset_db", False) @@ -68,8 +73,12 @@ def __init__( self.recall_threshold = self._teach_config.get("recall_threshold", 1.5) self.max_num_retrievals = self._teach_config.get("max_num_retrievals", 10) - self.analyzer = TextAnalyzerAgent(llm_config=llm_config) + # Create the analyzer. + if analyzer_llm_config is None: + analyzer_llm_config = llm_config + self.analyzer = TextAnalyzerAgent(llm_config=analyzer_llm_config) + # Create the memo store. self.memo_store = MemoStore(self.verbosity, self.reset_db, self.path_to_db_dir) self.user_comments = [] # Stores user comments until the end of each chat. From 92dc0f67e125abd647560937011c91869d691196 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 20 Oct 2023 10:39:05 -0700 Subject: [PATCH 46/50] documentation --- autogen/agentchat/contrib/teachable_agent.py | 19 ++++++++++--------- .../agentchat/contrib/text_analyzer_agent.py | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index fec42b8b769..1ee9ed15f93 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -87,7 +87,7 @@ def close_db(self): self.memo_store.close() def prepopulate_db(self): - """Initializes the DB with a few arbitrary memos.""" + """Adds a few arbitrary memos to the DB.""" self.memo_store.prepopulate() def _generate_teachable_assistant_reply( @@ -98,7 +98,7 @@ def _generate_teachable_assistant_reply( ) -> Tuple[bool, Union[str, Dict, None]]: """ Generates a reply to the last user message, after querying the memo store for relevant information. - Uses self.analyzer to make decisions about memo storage and retrieval. + Uses TextAnalyzerAgent to make decisions about memo storage and retrieval. """ if self.llm_config is False: raise ValueError("TeachableAgent requires self.llm_config to be set in its base class.") @@ -254,7 +254,7 @@ def concatenate_memo_texts(self, memo_list): return memo_texts def analyze(self, text_to_analyze, analysis_instructions): - """Asks the analyzer to analyze the given text according to specific instructions.""" + """Asks TextAnalyzerAgent to analyze the given text according to specific instructions.""" if self.verbosity >= 2: # Use the messaging mechanism so that the analyzer's messages are included in the printed chat. self.analyzer.reset() # Clear the analyzer's list of messages. @@ -272,15 +272,15 @@ class MemoStore: """ Provides memory storage and retrieval for a TeachableAgent, using a vector database. Each DB entry (called a memo) is a pair of strings: an input text and an output text. - The input text may be a question, or a task to perform. - The output text may be an answer to the question, or advice for how to perform the task. - Vector embeddings are currently provided by Chroma's default Sentence Transformers. + The input text might be a question, or a task to perform. + The output text might be an answer to the question, or advice on how to perform the task. + Vector embeddings are currently supplied by Chroma's default Sentence Transformers. """ def __init__(self, verbosity, reset, path_to_db_dir): """ Args: - - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. + - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. """ self.verbosity = verbosity @@ -310,6 +310,7 @@ def __init__(self, verbosity, reset, path_to_db_dir): self.list_memos() def list_memos(self): + """Prints the contents of MemoStore.""" print(colored("LIST OF MEMOS", "light_green")) for uid, text in self.uid_text_dict.items(): input_text, output_text = text @@ -321,7 +322,7 @@ def list_memos(self): ) def close(self): - """Saves the dict to disk.""" + """Saves self.uid_text_dict to disk.""" print(colored("\nSAVING MEMORY TO DISK", "light_green")) print(colored(" Location = {}".format(self.path_to_dict), "light_green")) with open(self.path_to_dict, "wb") as file: @@ -369,7 +370,7 @@ def get_nearest_memo(self, query_text): return input_text, output_text, distance def get_related_memos(self, query_text, n_results, threshold): - """Retrieves memos that are related to the given query text with the specified threshold.""" + """Retrieves memos that are related to the given query text within the specified distance threshold.""" if n_results > len(self.uid_text_dict): n_results = len(self.uid_text_dict) results = self.vec_db.query(query_texts=[query_text], n_results=n_results) diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index 2358c0f360b..7f4861b9f39 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -54,8 +54,8 @@ def _analyze_in_reply( sender: Optional[Agent] = None, config: Optional[Any] = None, ) -> Tuple[bool, Union[str, Dict, None]]: - """Analyzes the given text as instructed, and returns the analysis. - Assumes exactly two messages containing the text to analyze and the analysis instructions respectively. + """Analyzes the given text as instructed, and returns the analysis as a message. + Assumes exactly two messages containing the text to analyze and the analysis instructions. See TeachableAgent.analyze for an example of how to use this method.""" if self.llm_config is False: raise ValueError("TextAnalyzerAgent requires self.llm_config to be set in its base class.") From 859acc584918eaeb4ba3c853e20c888753290bac Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 20 Oct 2023 11:09:07 -0700 Subject: [PATCH 47/50] Modifications to run openai workflow. --- .github/workflows/openai.yml | 3 +++ test/agentchat/test_teachable_agent.py | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml index bb63bc6659c..0a678fd9ed1 100644 --- a/.github/workflows/openai.yml +++ b/.github/workflows/openai.yml @@ -53,6 +53,9 @@ jobs: if: matrix.python-version == '3.9' run: | pip install -e .[retrievechat] + - name: Install packages for Teachable when needed + run: | + pip install -e .[teachable] - name: Coverage if: matrix.python-version == '3.9' env: diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 4dc116f50c1..0210d694ceb 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -1,3 +1,8 @@ +try: + import openai +except ImportError: + openai = None +import pytest from autogen import ConversableAgent, config_list_from_json from autogen.agentchat.contrib.teachable_agent import TeachableAgent @@ -126,13 +131,9 @@ def use_task_advice_pair_phrasing(): return num_errors, num_tests +@pytest.mark.skipif(openai is None, reason="openai not installed") def test_all(): """Runs this file's unit tests.""" - try: - import openai - except ImportError: - return - total_num_errors, total_num_tests = 0, 0 num_trials = 1 # Set to a higher number to get a more accurate error rate. From 27452768291168755af92824c0e46337a4ff601a Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 20 Oct 2023 18:04:41 -0700 Subject: [PATCH 48/50] Test on just python 3.10. Replace agent with agent teachable_agent as recommended. --- autogen/agentchat/contrib/teachable_agent.py | 2 +- .../agentchat/contrib/text_analyzer_agent.py | 2 +- notebook/agentchat_teachability.ipynb | 273 ++++++++++-------- test/agentchat/chat_with_teachable_agent.py | 16 +- test/agentchat/test_teachable_agent.py | 76 ++--- 5 files changed, 200 insertions(+), 169 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 1ee9ed15f93..91fbf9c5612 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -23,7 +23,7 @@ class TeachableAgent(ConversableAgent): def __init__( self, - name="Agent", + name="teachable agent", system_message: Optional[ str ] = "You are a helpful AI assistant that remembers user teachings from prior chats.", diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py index 7f4861b9f39..8cf88eba6ae 100644 --- a/autogen/agentchat/contrib/text_analyzer_agent.py +++ b/autogen/agentchat/contrib/text_analyzer_agent.py @@ -14,7 +14,7 @@ class TextAnalyzerAgent(ConversableAgent): def __init__( self, - name="Analyzer", + name="analyzer", system_message: Optional[str] = system_message, human_input_mode: Optional[str] = "NEVER", llm_config: Optional[Union[Dict, bool]] = None, diff --git a/notebook/agentchat_teachability.ipynb b/notebook/agentchat_teachability.ipynb index aa8911f9b92..6b0cb6d1e86 100644 --- a/notebook/agentchat_teachability.ipynb +++ b/notebook/agentchat_teachability.ipynb @@ -17,7 +17,7 @@ "\n", "Conversational assistants based on LLMs can remember the current chat with the user, and can even demonstrate in-context learning of things that the user teaches the assistant during the chat. But these memories and learnings are lost once the chat is over, or when a single chat grows too long for the LLM to handle effectively. In subsequent chats, the user is forced to repeat any necessary instructions over and over.\n", "\n", - "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is saved to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the agent just once, and have the agent remember them in later chats.\n", + "`TeachableAgent` addresses these limitations by persisting user teachings across chat boundaries in long-term memory (a vector database). Memory is saved to disk at the end of each chat, then loaded from disk at the start of the next. Instead of copying all of memory into the context window, which would eat up valuable space, individual memories (called memos) are retrieved into context as needed. This allows the user to teach frequently used facts and skills to the teachable agent just once, and have it remember them in later chats.\n", "\n", "In making decisions about memo storage and retrieval, `TeachableAgent` calls an instance of `TextAnalyzerAgent` to analyze pieces of text in several different ways. This adds extra LLM calls involving a relatively small number of tokens. These calls can add a few seconds to the time a user waits for a response.\n", "\n", @@ -60,7 +60,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "gpt-4-0613\n" + "gpt-4\n" ] } ], @@ -71,7 +71,7 @@ " env_or_file=\"OAI_CONFIG_LIST\",\n", " file_location=\".\",\n", " filter_dict={\n", - " \"model\": [\"gpt-4-0613\", \"gpt-4\", \"gpt4\", \"gpt-4-32k\"],\n", + " \"model\": [\"gpt-4\", \"gpt4\", \"gpt-4-32k\"],\n", " },\n", ")\n", "\n", @@ -120,7 +120,7 @@ "metadata": {}, "source": [ "## Construct Agents\n", - "For this walkthrough, we start by resetting the agent's memory store. This deletes any memories from prior conversations that may be stored on disk." + "For this walkthrough, we start by resetting the teachable agent's memory store. This deletes any memories from prior conversations that may be stored on disk." ] }, { @@ -132,8 +132,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[92m\n", - "CLEARING MEMORY\u001B[0m\n" + "\u001b[92m\n", + "CLEARING MEMORY\u001b[0m\n" ] } ], @@ -144,6 +144,7 @@ "llm_config = {\n", " \"request_timeout\": 60,\n", " \"config_list\": config_list,\n", + " \"use_cache\": True, # Use False to explore LLM non-determinism.\n", "}\n", "\n", "teach_config={\n", @@ -161,8 +162,8 @@ " \n", "autogen.ChatCompletion.start_logging()\n", "\n", - "agent = TeachableAgent(\n", - " name=\"agent\",\n", + "teachable_agent = TeachableAgent(\n", + " name=\"teachable agent\",\n", " llm_config=llm_config,\n", " teach_config=teach_config)\n", "\n", @@ -193,14 +194,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "What is the Vicuna model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "I'm sorry, but as of my last programming, I don't have information regarding any \"Vicuna model\". It could be related to various fields like computer science, economics, or biology, but I would require more specific details about the context in which it is being referred to. If it's a new concept or theory, I might not have that information yet. However, if you have any details about the Vicuna model, I am here to learn and could help more effectively in the future.\n", + "I'm sorry, but I don't have information about the \"Vicuna model\" in my current database. Could you please provide some context, description, or details about this model so I can better assist you?\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -208,7 +209,7 @@ ], "source": [ "text = \"What is the Vicuna model?\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" ] }, { @@ -220,14 +221,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Vicuna is a 13B-parameter language model released by Meta.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "I apologize for any previous confusion. Given my last update preceding your new information, I wasn't aware of the Vicuna model. The model appears to be a cutting-edge innovation in the AI and language model field. Please note that as an AI, my knowledge is not updated in real-time, and it relies on periodic updates. Your information about Vicuna being a 13B-parameter language model developed by Meta is instrumental, and I'll remember for future conversations. Thank you for providing this information!\n", + "Thank you for providing the information. The Vicuna model is a large-scale language model developed by Meta. It consists of 13 billion parameters, which is what makes it able to understand and generate human-like text across various topics. Parameters in a neural network are the weights and biases assigned to each connection between the neurons, and having more parameters enables the model to learn and capture more complex patterns and relationships in data. As an AI language model, Vicuna can be utilized in various language-related tasks, such as text completion, translation, summarization, and more.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -235,7 +236,7 @@ ], "source": [ "text = \"Vicuna is a 13B-parameter language model released by Meta.\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" ] }, { @@ -247,14 +248,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "What is the Orca model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "I'm sorry for the confusion, but as of my last update, I don't have data about the \"Orca\" model. If it's a recent development or discovery, I may not know about it yet. However, if you have any details about the Orca model, I am more than ready to learn and remember, so I can help more effectively in our future conversations.\n", + "The Orca model is a yet-to-be-released language model by Meta, referring to the Research Preview API release of Meta's advanced language model. While I don't have specific details about the Orca model at this time, it is expected that the model would have an even larger number of parameters to further enhance its capabilities compared to its predecessor, Vicuna.\n", + "\n", + "As AI language models evolve, they generally become more efficient at understanding and generating human-like text. This makes them more effective in completing a wide range of tasks, including text completion, translation, summarization, and Q&A, as well as providing more accurate and coherent results in various applications.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -262,7 +265,7 @@ ], "source": [ "text = \"What is the Orca model?\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" ] }, { @@ -274,14 +277,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\n", + "Thank you for the clarification. Orca is a 13-billion-parameter language model developed by Microsoft. It is designed to outperform Vicuna, the language model released by Meta. With a large number of parameters, Orca is capable of understanding and generating human-like text across diverse subjects, making it highly useful in various applications.\n", "\n", - "Thank you for sharing this information. Based on your input, Orca is a 13-billion parameter language model developed by Microsoft, noted for outperforming Vicuna on most tasks. I'll remember this piece of information for future reference. Please note that at the time of my last update, I was not aware of the Orca model, and I appreciate your help in teaching me about recent advancements in AI and language models.\n", + "Orca's advanced capabilities allow it to excel in tasks such as text completion, translation, summarization, Q&A, and more. Its performance improvement over Vicuna highlights the rapid advancements in natural language processing (NLP) research and the development of increasingly capable AI language models.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -289,7 +294,7 @@ ], "source": [ "text = \"Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" ] }, { @@ -308,13 +313,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[93m\n", - "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001B[0m\n" + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n" ] } ], "source": [ - "agent.learn_from_user_feedback()" + "teachable_agent.learn_from_user_feedback()" ] }, { @@ -333,14 +338,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "How does the Vicuna model compare to the Orca model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "Based on your previous information, both Vicuna and Orca are language models with 13 billion parameters. Vicuna was released by Meta, while Orca was released by Microsoft. The primary comparison between the two, as per your teachings, is that Orca reportedly outperforms Vicuna on most tasks. However, without specific context or detailed benchmarks, I am unable to provide a more comprehensive comparison. It is recommended to look into further details such as model performance on specific tasks, computational efficiency, ease of use, and other factors depending on the intended usage.\n", + "The Vicuna and Orca models are both 13B-parameter language models released by Meta and Microsoft, respectively. In terms of performance, Orca has been reported to outperform Vicuna on most tasks. However, without detailed information about specific tasks and benchmarks, it is difficult to provide a more comprehensive comparison. Generally speaking, both models are advanced language models that aim to provide high-quality natural language understanding and generation, but Orca appears to have an edge in terms of overall performance.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -348,7 +353,7 @@ ], "source": [ "text = \"How does the Vicuna model compare to the Orca model?\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" ] }, { @@ -356,7 +361,7 @@ "metadata": {}, "source": [ "## Learning user preferences\n", - "Now let's teach the agent some of our preferences. Suppose that we frequently post short summaries of new papers for our team to read, and we want the agent to help us do this faster." + "Now let's teach the agent some of our preferences. Suppose that we frequently post short summaries of new papers for our team to read, and we want the teachable agent to help us do this faster." ] }, { @@ -368,7 +373,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Please summarize this abstract.\n", "\n", @@ -378,9 +383,9 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "AutoGen is an open-source platform developed by Qingyun Wu, Gagan Bansal, Jieye Zhang, and co-authors. It allows developers to create applications using large language models (LLMs) through a customizable multi-agent conversation framework. With AutoGen, developers can define agent behavior, and use a combination of LLMs, human inputs, and resources. They can also create flexible conversation patterns with natural language and computer code. Automotive can be utilized for several domains like mathematics, coding, Q&A, operations research, online decision-making, entertainment, etc. The utility and effectiveness of AutoGen have been confirmed through empirical studies. Although the abstract doesn't mention Orca, the 13B-parameter LLM from Microsoft, the relationship between the two may come up in future discussions as both concern the usage of LLMs.\n", + "AutoGen is an open-source framework designed to enable developers to create LLM applications with multiple customizable agents that can converse with each other to complete tasks. These agents can operate using different combinations of LLMs, human inputs, and tools, allowing developers to define agent interaction behaviors flexibly. AutoGen supports programming flexible conversation patterns using both natural language and code, making it suitable for building diverse applications with varying complexities and LLM capacities. Its effectiveness has been demonstrated through empirical studies across various domains including mathematics, coding, operations research, decision-making, and entertainment.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -393,14 +398,14 @@ "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "But that's too long and unstructured. So let's teach the agent our preference." + "But that's unstructured. So let's teach the agent our preference." ] }, { @@ -412,7 +417,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Please summarize this abstract. \n", "When I'm summarizing an abstract, I try to make the summary contain just three short bullet points: the title, the innovation, and the key empirical results.\n", @@ -423,11 +428,11 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "- Title: \"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\" \n", - "- Innovation: AutoGen is a novel open-source framework developed to allow the creation of LLM applications via multiple, customizable and conversable agents. The agents can operate in various combinations of LLMs, human inputs and tools. Both natural language and code can be used to define their interaction behaviors flexibly.\n", - "- Key Empirical Results: AutoGen has proven effective in multiple applications across wide-ranging domains including mathematics, coding, question answering, operations research, online decision-making, and entertainment.\n", + "- Title: AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", + "- Innovation: Open-source framework for creating customizable LLM applications through agent conversations, supporting various modes and interaction behaviors.\n", + "- Key Empirical Results: Demonstrated effectiveness across diverse application domains, including mathematics, coding, question answering, and more.\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -441,14 +446,14 @@ "Qingyun Wu, Gagan Bansal, Jieyu Zhang, Yiran Wu, Beibin Li, Erkang Zhu, Li Jiang, Xiaoyun Zhang, Shaokun Zhang, Jiale Liu, Ahmed Hassan Awadallah, Ryen W White, Doug Burger, Chi Wang\n", "AutoGen is an open-source framework that allows developers to build LLM applications via multiple agents that can converse with each other to accomplish tasks. AutoGen agents are customizable, conversable, and can operate in various modes that employ combinations of LLMs, human inputs, and tools. Using AutoGen, developers can also flexibly define agent interaction behaviors. Both natural language and computer code can be used to program flexible conversation patterns for different applications. AutoGen serves as a generic infrastructure to build diverse applications of various complexities and LLM capacities. Empirical studies demonstrate the effectiveness of the framework in many example applications, with domains ranging from mathematics, coding, question answering, operations research, online decision-making, entertainment, etc.\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "That's better, but will the agent remember these preferences in the future, for a different paper? Let's start a new chat to find out." + "That's much better, but will the teachable agent remember these preferences in the future, for a different paper? Let's start a new chat to find out!" ] }, { @@ -460,9 +465,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[93m\n", - "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001B[0m\n", - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Please summarize this abstract.\n", "\n", @@ -471,27 +476,25 @@ "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", - "\n", - "Title: Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "Innovation: The paper reports the investigation of GPT-4, an AI model by OpenAI, claiming it to possess more general intelligence than previous models. It's capable of performing a wide range of tasks spanning different domains including mathematics, coding, vision, medicine, law, and psychology, delivering near-human performance in many of these tasks without needing specific prompts.\n", - "\n", - "Key Empirical Results: GPT-4 greatly outperforms prior models like ChatGPT and provides a glimpse into what early versions of artificial general intelligence (AGI) systems may look like. Despite its depth and breadth of capabilities, the paper acknowledges the need to re-evaluate the current predictive model approach and potentially pursue a new paradigm for AGI. It closes with a consideration of societal implications of such technology and future research directions.\n", + "- Title: Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", + "- Innovation: GPT-4, an LLM with remarkable capabilities, demonstrates human-level performance across various domains, like math, coding, vision, medicine, law, and psychology.\n", + "- Key results: GPT-4 significantly surpasses prior models, suggesting it may be an early version of AGI; limitations and challenges toward deeper AGI are also discussed.\n", "\n", "--------------------------------------------------------------------------------\n" ] } ], "source": [ - "agent.learn_from_user_feedback()\n", + "teachable_agent.learn_from_user_feedback()\n", "\n", "text = \"\"\"Please summarize this abstract.\n", "\n", "Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", "Sébastien Bubeck, Varun Chandrasekaran, Ronen Eldan, Johannes Gehrke, Eric Horvitz, Ece Kamar, Peter Lee, Yin Tat Lee, Yuanzhi Li, Scott Lundberg, Harsha Nori, Hamid Palangi, Marco Tulio Ribeiro, Yi Zhang\n", "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" ] }, { @@ -499,21 +502,21 @@ "metadata": {}, "source": [ "## Learning new skills\n", - "Finally, let's extend the agent's capabilities by teaching it a new skill for accomplishing a challenging type of task. \n", + "Finally, let's extend the teachable agent's capabilities by teaching it a new skill for accomplishing a challenging type of task. \n", "\n", "The [Sparks of AGI](https://arxiv.org/abs/2303.12712) paper evaluated GPT-4 on math problems like the following, which it could only solve 32% of the time." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -522,24 +525,23 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "Plan:\n", - "1. First, I'll assess the starting equation to see what figures we're working with.\n", - "2. Then, I'll manipulate each number one at a time to see if the equation could result in 99.\n", - "3. I cross-reference the results to check my solution.\n", + "Step 1: Identify the current equation and the target value on the right-hand side.\n", + "Current equation: 9 * 4 + 6 * 6 = 72\n", + "Target value: 99\n", "\n", - "Here's how I'll break down the equation:\n", + "Step 2: Determine what numbers can be changed on the left-hand side.\n", + "Possible changes: 9, 4, 6 (first occurrence), and 6 (second occurrence)\n", "\n", - "Original equation: 9 * 4 + 6 * 6 = 72\n", - "Goal: Change one integer so that the right side equals 99.\n", + "Step 3: Evaluate whether each change in the numbers on the left-hand side can result in the target value.\n", + "9 -> If changed, it would create a larger product.\n", + "4 -> If changed, the product would increase or decrease by more than the amount needed to reach 99.\n", + "6 (first occurrence) -> If increased to 7, then the equation would be 9 * 4 + 7 * 6 = 94, which is still not 99.\n", + "6 (second occurrence) -> If increased to 7, then the equation would be 9 * 4 + 6 * 7 = 99.\n", "\n", - "1. Firstly, I start with looking at the integer '9' from the multiplicative term '9 * 4'. The product is 36. If I were to increase this product to a value closer to 99 without going over, I would find the number 24. Because 24 * 4 is 96, which is closest to 99. The equation becomes: 24 * 4 + 6 * 6 = 96 + 36 = 132 which exceeds 99. \n", - "2. Second, I check '4' from the multiplicative term '9 * 4'. Its product is 36 as well. If I increase this product, the number I find is 22 because 9*22 is 198, which is furthest from 99. Temporarily, this option seems incorrect.\n", - "3. Thirdly, I consider '6' from the additive term '6 * 6'. Its product is 36. I realize I can't increase this product without exceeding 99.\n", - "4. Finally, I evaluate the other '6' from the multiplicative term '6 * 6'. Its product is 36 as well. Here, I found the solution. If I change this '6' to '3', the sum of the products becomes 99. 9*4 + 6*3 = 36 + 18 = 54.\n", - "\n", - "Solution: The equation that satisfies the criteria is 9 * 4 + 6 * 3.\n", + "Step 4: Write down the solution.\n", + "The solution is: 9 * 4 + 6 * 7\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -551,7 +553,7 @@ "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" ] }, { @@ -563,14 +565,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -591,20 +593,41 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", - "\n", - "Step-by-step plan:\n", - "1. Original number on the right (E): 72\n", - "2. Final number on the right (F): 99\n", - "3. The difference between the two (G): 99 - 72 = 27\n", - "4. Go through each digit on the left one by one and check if it divides evenly into G: - 9 does not divide evenly into 27, 4 does not divide evenly into 27, 6 divides evenly into 27.\n", - "5. With H being 6 (the number that divides into G), calculate J = G / H: J = 27 / 6 = 4.5. But J must be an integer, so H = 6 is not suitable.\n", - "6. Continue to the next digit which is also 6. The same process yields J = 27 / 6 = 4.5, which is not an integer, so it's not a solution.\n", - "7. The only number left that could divide 27 without fractional remainder is 9.\n", - "8. Testing H = 9, gives J = G / H = 27 / 9 = 3. So we need to increase the number that is multiplied by 9 by 3, which is, increase 4 to 7. \n", - "9. Equation becomes: 9 * 7 + 6 * 6 = 63 + 36 = 99, which equals F.\n", - " \n", - "The solution is: 9 * 7 + 6 * 6.\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\n", + "Step 1: Identify the original number (E) and the final number (F) on the right-hand side.\n", + "E = 72\n", + "F = 99\n", + "\n", + "Step 2: Calculate the difference between the two, G = F - E.\n", + "G = 99 - 72 = 27\n", + "\n", + "Step 3: Examine the numbers on the left one by one until finding one that divides evenly into G, where negative integers are allowed.\n", + "9 (divides evenly into 27)\n", + "4 (does not)\n", + "6 (first occurrence, does not)\n", + "6 (second occurrence, does not)\n", + "\n", + "Step 4: Calculate J = G / H. This is the number of times that H divides into G.\n", + "H = 9 (since 9 divides evenly into 27)\n", + "J = 27 / 9 = 3\n", + "\n", + "Step 5: Verify that J is an integer, and that H * J = G.\n", + "H * J = 9 * 3 = 27\n", + "J = 3 (which is an integer)\n", + "\n", + "Step 6: Find the number on the left which is multiplied by H, and call it K.\n", + "Number on the leftside of the equation that is multiplied by 9: 4\n", + "Therefore, K = 4\n", + "\n", + "Step 7: Change K to K + J.\n", + "K + J = 4 + 3 = 7\n", + "\n", + "Step 8: Recompute the value on the left, and verify that it equals F.\n", + "9 * 7 + 6 * 6 = 63 + 36 = 99 (which is equal to F)\n", + "\n", + "Step 9: Write down the solution.\n", + "The solution is: 9 * 7 + 6 * 6\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -628,28 +651,28 @@ "9. Recompute the value on the left, and verify that it equals F.\n", "Finally, write down your solution as: \"The solution is: A * B + C * D\".\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "When given this advice, GPT-4 can solve such problems over 95% of the time. But can it remember the strategy so the user doesn't have to explain it over and over? As before, let's start a new chat to find out." + "When given this advice, GPT-4 can solve such problems over 95% of the time. But can the teachable agent remember the strategy so the user doesn't have to explain it over and over? As before, let's start a new chat to find out." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001B[93m\n", - "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001B[0m\n", - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[93m\n", + "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -658,54 +681,54 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "Alright, following your guidelines, we can start this step-by-step:\n", + "Let's follow the advice to modify one integer on the left hand side of the equation to make the right hand side become 99:\n", "\n", - "1. Let E be the original number on the right, which is 72.\n", - "2. Let F be the final number on the right, which is 99.\n", - "3. Calculate the difference between the two: G = F - E = 99 - 72 = 27.\n", - "4. Examine the numbers on the left one by one until finding one that divides evenly into G. The numbers we have are: 9, 4, 6, 6. The number 9 divides evenly into 27.\n", - "5. Calculate J = G / H. Here H is the number that divides evenly into G, so H is 9. Thus, J = 27 / 9 = 3.\n", - "6. Verifying that J is an integer and that H * J = G: 9 * 3 = 27 and 27 = G (which is correct).\n", - "7. Find the number on the left which is multiplied by H, and call it K. Here, the number multiplied by 9 is 4, so K = 4.\n", - "8. Change K to K + J. Here, K + J is 4 + 3 = 7.\n", - "9. Recompute the value on the left and verify that it now equals F: 9 * 7 + 6 * 6 = 63 + 36 = 99, which is equal to F.\n", + "1. Let E denote the original number on the right: E = 72.\n", + "2. Let F denote the final number on the right: F = 99.\n", + "3. Calculate the difference between the two, G = F - E: G = 99 - 72 = 27.\n", + "4. Examine the numbers on the left one by one to find one that divides evenly into G: 9, 4, 6. Since 9 divides 27 evenly, we will work with 9 (H = 9).\n", + "5. Calculate J = G / H: J = 27 / 9 = 3.\n", + "6. Verify that J is an integer, and that H * J = G: J is an integer, and 9 * 3 = 27.\n", + "7. Find the number on the left which is multiplied by H (9), and call it K: K = 4.\n", + "8. Change K to K + J: K_new = 4 + 3 = 7.\n", + "9. Recompute the value on the left, and verify that it equals F: (9 * 7) + (6 * 6) = 63 + 36 = 99.\n", "\n", - "So the solution is: 9 * 7 + 6 * 6.\n", + "The solution is: 9 * 7 + 6 * 6\n", "\n", "--------------------------------------------------------------------------------\n" ] } ], "source": [ - "agent.learn_from_user_feedback()\n", + "teachable_agent.learn_from_user_feedback()\n", "\n", "text = \"\"\"Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 99?\n", "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=True)" + "user.initiate_chat(teachable_agent, message=text, clear_history=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "As a final check, let's test the agent's newly learned skill on a separate instance of the task." + "As a final check, let's test the teachable agent's newly learned skill on a separate instance of the task." ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001B[33muser\u001B[0m (to agent):\n", + "\u001b[33muser\u001b[0m (to teachable agent):\n", "\n", "Consider the identity: \n", "8 * 3 + 7 * 9 = 87\n", @@ -714,21 +737,21 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001B[33magent\u001B[0m (to user):\n", + "\u001b[33mteachable agent\u001b[0m (to user):\n", "\n", - "Alright, let's proceed with this new identity following the same step-by-step process:\n", + "Let's follow the advice to modify one integer on the left hand side of the equation to make the right hand side become 59:\n", "\n", - "1. Let E be the original number on the right, 87.\n", - "2. Let F be the final number on the right, 59.\n", - "3. Calculate the difference G between the two: G = F - E = 59 - 87 = -28.\n", - "4. Let’s examine the numbers on the left side of the equation to find one that divides evenly into G: 8, 3, 7, 9. 7 divides evenly into -28.\n", - "5. Calculate J = G / H, here G is -28 and H is 7, so J = -28 / 7 = -4.\n", - "6. Verifying that J is an integer and H * J = G, we see that 7 * -4 = -28, which equals G (which is correct).\n", - "7. Find the number on the left which is multiplied by 7, and this number is 9, so we call this K.\n", - "8. Now update K to K + J, so 9 + (-4) = 5.\n", - "9. Recompute the value on the left and verify that it equals F: 8 * 3 + 7 * 5 = 24 + 35 = 59.\n", + "1. Let E denote the original number on the right: E = 87.\n", + "2. Let F denote the final number on the right: F = 59.\n", + "3. Calculate the difference between the two, G = F - E: G = 59 - 87 = -28.\n", + "4. Examine the numbers on the left one by one to find one that divides evenly into G: 8, 3, 7, 9. Since 7 divides -28 evenly, we will work with 7 (H = 7).\n", + "5. Calculate J = G / H: J = -28 / 7 = -4.\n", + "6. Verify that J is an integer, and that H * J = G: J is an integer, and 7 * (-4) = -28.\n", + "7. Find the number on the left which is multiplied by H (7), and call it K: K = 9.\n", + "8. Change K to K + J: K_new = 9 + (-4) = 5.\n", + "9. Recompute the value on the left, and verify that it equals F: (8 * 3) + (7 * 5) = 24 + 35 = 59.\n", "\n", - "So the solution is: 8 * 3 + 7 * 5.\n", + "The solution is: 8 * 3 + 7 * 5\n", "\n", "--------------------------------------------------------------------------------\n" ] @@ -740,7 +763,7 @@ "Can you modify exactly one integer (and not more than that!) on the left hand side of the equation so the right hand side becomes 59?\n", "-Let's think step-by-step, write down a plan, and then write down your solution as: \"The solution is: A * B + C * D\".\n", "\"\"\"\n", - "user.initiate_chat(agent, message=text, clear_history=False)" + "user.initiate_chat(teachable_agent, message=text, clear_history=False)" ] } ], diff --git a/test/agentchat/chat_with_teachable_agent.py b/test/agentchat/chat_with_teachable_agent.py index 6a837cda557..8f72af66ec9 100644 --- a/test/agentchat/chat_with_teachable_agent.py +++ b/test/agentchat/chat_with_teachable_agent.py @@ -24,8 +24,8 @@ def create_teachable_agent(reset_db=False): # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) - agent = TeachableAgent( - name="agent", + teachable_agent = TeachableAgent( + name="teachable agent", llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, teach_config={ "verbosity": verbosity, @@ -34,7 +34,7 @@ def create_teachable_agent(reset_db=False): "recall_threshold": recall_threshold, }, ) - return agent + return teachable_agent def interact_freely_with_user(): @@ -42,17 +42,17 @@ def interact_freely_with_user(): # Create the agents. print(colored("\nLoading previous memory (if any) from disk.", "light_cyan")) - agent = create_teachable_agent(reset_db=False) + teachable_agent = create_teachable_agent(reset_db=False) user = UserProxyAgent("user", human_input_mode="ALWAYS") # Start the chat. - agent.initiate_chat(user, message="Greetings, I'm a teachable user assistant! What's on your mind today?") + teachable_agent.initiate_chat(user, message="Greetings, I'm a teachable user assistant! What's on your mind today?") - # Let the agent remember things that should be learned from this chat. - agent.learn_from_user_feedback() + # Let the teachable agent remember things that should be learned from this chat. + teachable_agent.learn_from_user_feedback() # Wrap up. - agent.close_db() + teachable_agent.close_db() if __name__ == "__main__": diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 0210d694ceb..c0030216dbf 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -1,8 +1,11 @@ try: import openai + + skip = False except ImportError: - openai = None + skip = True import pytest +import sys from autogen import ConversableAgent, config_list_from_json from autogen.agentchat.contrib.teachable_agent import TeachableAgent @@ -36,8 +39,8 @@ def create_teachable_agent(reset_db=False, verbosity=0): # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints # and OAI_CONFIG_LIST_sample config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) - agent = TeachableAgent( - name="agent", + teachable_agent = TeachableAgent( + name="teachable agent", llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, teach_config={ "verbosity": verbosity, @@ -46,12 +49,12 @@ def create_teachable_agent(reset_db=False, verbosity=0): "recall_threshold": recall_threshold, }, ) - return agent + return teachable_agent -def check_agent_response(agent, user, correct_answer): +def check_agent_response(teachable_agent, user, correct_answer): """Checks whether the agent's response contains the correct answer, and returns the number of errors (1 or 0).""" - agent_response = user.last_message(agent)["content"] + agent_response = user.last_message(teachable_agent)["content"] if correct_answer not in agent_response: print(colored(f"\nTEST FAILED: EXPECTED ANSWER {correct_answer} NOT FOUND IN AGENT RESPONSE", "light_red")) if assert_on_error: @@ -63,75 +66,80 @@ def check_agent_response(agent, user, correct_answer): def use_question_answer_phrasing(): - """Tests whether the agent can answer a question after being taught the answer in a previous chat.""" + """Tests whether the teachable agent can answer a question after being taught the answer in a previous chat.""" print(colored("\nTEST QUESTION-ANSWER PHRASING", "light_cyan")) num_errors, num_tests = 0, 0 - agent = create_teachable_agent(reset_db=True, verbosity=qa_verbosity) # For a clean test, clear the agent's memory. + teachable_agent = create_teachable_agent( + reset_db=True, verbosity=qa_verbosity + ) # For a clean test, clear the agent's memory. user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. - agent.prepopulate_db() + teachable_agent.prepopulate_db() - # Ask the agent to do something using terminology it doesn't understand. - user.initiate_chat(recipient=agent, message="What is the twist of 5 and 7?") + # Ask the teachable agent to do something using terminology it doesn't understand. + user.initiate_chat(recipient=teachable_agent, message="What is the twist of 5 and 7?") - # Explain the terminology to the agent. + # Explain the terminology to the teachable agent. user.send( - recipient=agent, + recipient=teachable_agent, message="Actually, the twist of two or more numbers is their product minus their sum. Try again.", ) - num_errors += check_agent_response(agent, user, "23") + num_errors += check_agent_response(teachable_agent, user, "23") num_tests += 1 - # Let the agent remember things that should be learned from this chat. - agent.learn_from_user_feedback() + # Let the teachable agent remember things that should be learned from this chat. + teachable_agent.learn_from_user_feedback() - # Now start a new chat to clear the context, and require the agent to use its new knowledge. + # Now start a new chat to clear the context, and require the teachable agent to use its new knowledge. print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) - user.initiate_chat(recipient=agent, message="What's the twist of 8 and 3 and 2?") - num_errors += check_agent_response(agent, user, "35") + user.initiate_chat(recipient=teachable_agent, message="What's the twist of 8 and 3 and 2?") + num_errors += check_agent_response(teachable_agent, user, "35") num_tests += 1 # Wrap up. - agent.close_db() + teachable_agent.close_db() return num_errors, num_tests def use_task_advice_pair_phrasing(): - """Tests whether the agent can demonstrate a new skill after being taught a task-advice pair in a previous chat.""" + """Tests whether the teachable agent can demonstrate a new skill after being taught a task-advice pair in a previous chat.""" print(colored("\nTEST TASK-ADVICE PHRASING", "light_cyan")) num_errors, num_tests = 0, 0 - agent = create_teachable_agent( - reset_db=True, verbosity=skill_verbosity # For a clean test, clear the agent's memory. + teachable_agent = create_teachable_agent( + reset_db=True, verbosity=skill_verbosity # For a clean test, clear the teachable agent's memory. ) user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER") # Prepopulate memory with a few arbitrary memos, just to make retrieval less trivial. - agent.prepopulate_db() + teachable_agent.prepopulate_db() - # Ask the agent to do something, and provide some helpful advice. + # Ask the teachable agent to do something, and provide some helpful advice. user.initiate_chat( - recipient=agent, + recipient=teachable_agent, message="Compute the twist of 5 and 7. Here's a hint: The twist of two or more numbers is their product minus their sum.", ) - num_errors += check_agent_response(agent, user, "23") + num_errors += check_agent_response(teachable_agent, user, "23") num_tests += 1 - # Let the agent remember things that should be learned from this chat. - agent.learn_from_user_feedback() + # Let the teachable agent remember things that should be learned from this chat. + teachable_agent.learn_from_user_feedback() - # Now start a new chat to clear the context, and require the agent to use its new knowledge. + # Now start a new chat to clear the context, and require the teachable agent to use its new knowledge. print(colored("\nSTARTING A NEW CHAT WITH EMPTY CONTEXT", "light_cyan")) - user.initiate_chat(recipient=agent, message="Please calculate the twist of 8 and 3 and 2.") - num_errors += check_agent_response(agent, user, "35") + user.initiate_chat(recipient=teachable_agent, message="Please calculate the twist of 8 and 3 and 2.") + num_errors += check_agent_response(teachable_agent, user, "35") num_tests += 1 # Wrap up. - agent.close_db() + teachable_agent.close_db() return num_errors, num_tests -@pytest.mark.skipif(openai is None, reason="openai not installed") +@pytest.mark.skipif( + skip or not sys.version.startswith("3.10"), + reason="do not run if openai is not installed or py!=3.10", +) def test_all(): """Runs this file's unit tests.""" total_num_errors, total_num_tests = 0, 0 From ff4a369c442b78a517a397c5c88a7b00754ce058 Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 20 Oct 2023 18:22:38 -0700 Subject: [PATCH 49/50] Test on python 3.9 instead of 3.10. --- test/agentchat/test_teachable_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index c0030216dbf..60316f380e1 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -137,8 +137,8 @@ def use_task_advice_pair_phrasing(): @pytest.mark.skipif( - skip or not sys.version.startswith("3.10"), - reason="do not run if openai is not installed or py!=3.10", + skip or not sys.version.startswith("3.9"), + reason="do not run if openai is not installed or py!=3.9", ) def test_all(): """Runs this file's unit tests.""" From f962f4548e1c61f8cf84037a9583b81854161ccc Mon Sep 17 00:00:00 2001 From: rickyloynd-microsoft Date: Fri, 20 Oct 2023 18:40:15 -0700 Subject: [PATCH 50/50] Remove space from name -> teachableagent --- autogen/agentchat/contrib/teachable_agent.py | 5 +- notebook/agentchat_teachability.ipynb | 50 ++++++++++---------- test/agentchat/chat_with_teachable_agent.py | 2 +- test/agentchat/test_teachable_agent.py | 2 +- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py index 91fbf9c5612..8db5b699ea2 100644 --- a/autogen/agentchat/contrib/teachable_agent.py +++ b/autogen/agentchat/contrib/teachable_agent.py @@ -19,11 +19,12 @@ def colored(x, *args, **kwargs): class TeachableAgent(ConversableAgent): """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings. - In this class, the term 'user' refers to any caller (human or not) sending messages to this agent.""" + In this class, the term 'user' refers to any caller (human or not) sending messages to this agent. + Not yet tested in the group-chat setting.""" def __init__( self, - name="teachable agent", + name="teachableagent", system_message: Optional[ str ] = "You are a helpful AI assistant that remembers user teachings from prior chats.", diff --git a/notebook/agentchat_teachability.ipynb b/notebook/agentchat_teachability.ipynb index 6b0cb6d1e86..54f73fbcb83 100644 --- a/notebook/agentchat_teachability.ipynb +++ b/notebook/agentchat_teachability.ipynb @@ -163,7 +163,7 @@ "autogen.ChatCompletion.start_logging()\n", "\n", "teachable_agent = TeachableAgent(\n", - " name=\"teachable agent\",\n", + " name=\"teachableagent\",\n", " llm_config=llm_config,\n", " teach_config=teach_config)\n", "\n", @@ -194,12 +194,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "What is the Vicuna model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "I'm sorry, but I don't have information about the \"Vicuna model\" in my current database. Could you please provide some context, description, or details about this model so I can better assist you?\n", "\n", @@ -221,12 +221,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Vicuna is a 13B-parameter language model released by Meta.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "Thank you for providing the information. The Vicuna model is a large-scale language model developed by Meta. It consists of 13 billion parameters, which is what makes it able to understand and generate human-like text across various topics. Parameters in a neural network are the weights and biases assigned to each connection between the neurons, and having more parameters enables the model to learn and capture more complex patterns and relationships in data. As an AI language model, Vicuna can be utilized in various language-related tasks, such as text completion, translation, summarization, and more.\n", "\n", @@ -248,12 +248,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "What is the Orca model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "The Orca model is a yet-to-be-released language model by Meta, referring to the Research Preview API release of Meta's advanced language model. While I don't have specific details about the Orca model at this time, it is expected that the model would have an even larger number of parameters to further enhance its capabilities compared to its predecessor, Vicuna.\n", "\n", @@ -277,12 +277,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Orca is a 13B-parameter language model released by Microsoft. It outperforms Vicuna on most tasks.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "Thank you for the clarification. Orca is a 13-billion-parameter language model developed by Microsoft. It is designed to outperform Vicuna, the language model released by Meta. With a large number of parameters, Orca is capable of understanding and generating human-like text across diverse subjects, making it highly useful in various applications.\n", "\n", @@ -338,12 +338,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "How does the Vicuna model compare to the Orca model?\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "The Vicuna and Orca models are both 13B-parameter language models released by Meta and Microsoft, respectively. In terms of performance, Orca has been reported to outperform Vicuna on most tasks. However, without detailed information about specific tasks and benchmarks, it is difficult to provide a more comprehensive comparison. Generally speaking, both models are advanced language models that aim to provide high-quality natural language understanding and generation, but Orca appears to have an edge in terms of overall performance.\n", "\n", @@ -373,7 +373,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Please summarize this abstract.\n", "\n", @@ -383,7 +383,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "AutoGen is an open-source framework designed to enable developers to create LLM applications with multiple customizable agents that can converse with each other to complete tasks. These agents can operate using different combinations of LLMs, human inputs, and tools, allowing developers to define agent interaction behaviors flexibly. AutoGen supports programming flexible conversation patterns using both natural language and code, making it suitable for building diverse applications with varying complexities and LLM capacities. Its effectiveness has been demonstrated through empirical studies across various domains including mathematics, coding, operations research, decision-making, and entertainment.\n", "\n", @@ -417,7 +417,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Please summarize this abstract. \n", "When I'm summarizing an abstract, I try to make the summary contain just three short bullet points: the title, the innovation, and the key empirical results.\n", @@ -428,7 +428,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "- Title: AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n", "- Innovation: Open-source framework for creating customizable LLM applications through agent conversations, supporting various modes and interaction behaviors.\n", @@ -467,7 +467,7 @@ "text": [ "\u001b[93m\n", "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Please summarize this abstract.\n", "\n", @@ -476,7 +476,7 @@ "Artificial intelligence (AI) researchers have been developing and refining large language models (LLMs) that exhibit remarkable capabilities across a variety of domains and tasks, challenging our understanding of learning and cognition. The latest model developed by OpenAI, GPT-4, was trained using an unprecedented scale of compute and data. In this paper, we report on our investigation of an early version of GPT-4, when it was still in active development by OpenAI. We contend that (this early version of) GPT-4 is part of a new cohort of LLMs (along with ChatGPT and Google's PaLM for example) that exhibit more general intelligence than previous AI models. We discuss the rising capabilities and implications of these models. We demonstrate that, beyond its mastery of language, GPT-4 can solve novel and difficult tasks that span mathematics, coding, vision, medicine, law, psychology and more, without needing any special prompting. Moreover, in all of these tasks, GPT-4's performance is strikingly close to human-level performance, and often vastly surpasses prior models such as ChatGPT. Given the breadth and depth of GPT-4's capabilities, we believe that it could reasonably be viewed as an early (yet still incomplete) version of an artificial general intelligence (AGI) system. In our exploration of GPT-4, we put special emphasis on discovering its limitations, and we discuss the challenges ahead for advancing towards deeper and more comprehensive versions of AGI, including the possible need for pursuing a new paradigm that moves beyond next-word prediction. We conclude with reflections on societal influences of the recent technological leap and future research directions.\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "- Title: Sparks of Artificial General Intelligence: Early experiments with GPT-4\n", "- Innovation: GPT-4, an LLM with remarkable capabilities, demonstrates human-level performance across various domains, like math, coding, vision, medicine, law, and psychology.\n", @@ -516,7 +516,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -525,7 +525,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "Step 1: Identify the current equation and the target value on the right-hand side.\n", "Current equation: 9 * 4 + 6 * 6 = 72\n", @@ -572,7 +572,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -593,7 +593,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "Step 1: Identify the original number (E) and the final number (F) on the right-hand side.\n", "E = 72\n", @@ -672,7 +672,7 @@ "text": [ "\u001b[93m\n", "REVIEWING CHAT FOR USER TEACHINGS TO REMEMBER\u001b[0m\n", - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Consider the identity: \n", "9 * 4 + 6 * 6 = 72\n", @@ -681,7 +681,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "Let's follow the advice to modify one integer on the left hand side of the equation to make the right hand side become 99:\n", "\n", @@ -728,7 +728,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[33muser\u001b[0m (to teachable agent):\n", + "\u001b[33muser\u001b[0m (to teachableagent):\n", "\n", "Consider the identity: \n", "8 * 3 + 7 * 9 = 87\n", @@ -737,7 +737,7 @@ "\n", "\n", "--------------------------------------------------------------------------------\n", - "\u001b[33mteachable agent\u001b[0m (to user):\n", + "\u001b[33mteachableagent\u001b[0m (to user):\n", "\n", "Let's follow the advice to modify one integer on the left hand side of the equation to make the right hand side become 59:\n", "\n", diff --git a/test/agentchat/chat_with_teachable_agent.py b/test/agentchat/chat_with_teachable_agent.py index 8f72af66ec9..211ebe59097 100644 --- a/test/agentchat/chat_with_teachable_agent.py +++ b/test/agentchat/chat_with_teachable_agent.py @@ -25,7 +25,7 @@ def create_teachable_agent(reset_db=False): # and OAI_CONFIG_LIST_sample config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) teachable_agent = TeachableAgent( - name="teachable agent", + name="teachableagent", llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, teach_config={ "verbosity": verbosity, diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py index 60316f380e1..7a3367dbd72 100644 --- a/test/agentchat/test_teachable_agent.py +++ b/test/agentchat/test_teachable_agent.py @@ -40,7 +40,7 @@ def create_teachable_agent(reset_db=False, verbosity=0): # and OAI_CONFIG_LIST_sample config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict) teachable_agent = TeachableAgent( - name="teachable agent", + name="teachableagent", llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache}, teach_config={ "verbosity": verbosity,