Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add math-class group chat test #309

Closed
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion autogen/agentchat/groupchat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
import sys
from typing import Dict, List, Optional, Union
from typing import Callable, Dict, List, Optional, Union
from .agent import Agent
from .conversable_agent import ConversableAgent
import logging
Expand Down Expand Up @@ -121,13 +121,15 @@ def __init__(
human_input_mode: Optional[str] = "NEVER",
system_message: Optional[str] = "Group chat manager.",
# seed: Optional[int] = 4,
is_termination_msg: Optional[Callable[[Dict], bool]] = None,
**kwargs,
):
super().__init__(
name=name,
max_consecutive_auto_reply=max_consecutive_auto_reply,
human_input_mode=human_input_mode,
system_message=system_message,
is_termination_msg=is_termination_msg,
**kwargs,
)
self.register_reply(Agent, GroupChatManager.run_chat, config=groupchat, reset_config=GroupChat.reset)
Expand Down Expand Up @@ -173,6 +175,15 @@ def run_chat(
raise
if reply is None:
break

if (
reply is dict
and self._is_termination_msg(reply)
or reply is str
and self._is_termination_msg({"content": reply})
):
break

# The speaker sends the message without requesting a reply
speaker.send(reply, self, request_reply=False)
message = self.last_message(speaker)
Expand Down
189 changes: 187 additions & 2 deletions test/agentchat/test_groupchat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest
import autogen
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
import random


def test_func_call_groupchat():
Expand Down Expand Up @@ -49,6 +51,181 @@ def test_func_call_groupchat():
agent2.initiate_chat(group_chat_manager, message={"function_call": {"name": "func", "arguments": '{"x": 1}'}})


def test_group_chat_math_class():
"""
This test case is to simulate a math class.
where teacher creates math questions and student resolves the questions.
teacher will create a question, student will resolve the question and tell teacher the answer.
If the answer is correct, teacher will create another question, otherwise, teacher will ask student to resolve the question again.
The class will end when teacher has created 3 questions.

This test case is created to test the following features:
- speaker selection should work under a continuous q&a scenario among two agents and GPT 3.5 model.
- admin should end the class when teacher has created 3 questions.
"""
skip_if_openai_not_available()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. This is better than my old way. @rickyloynd-microsoft @thinkall @kevin666aa FYI.

config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
"model": ["gpt-3.5-turbo"],
},
)
gpt3_5_config = {
"model": "gpt-3.5-turbo",
"seed": random.randint(0, 100), # change the seed for different trials
"temperature": 0,
"config_list": config_list,
"request_timeout": 120,
}

llm_config_for_user_proxy = {
**gpt3_5_config,
"functions": [
{
"name": "terminate_group_chat",
"description": "terminate group chat",
"parameters": {
"type": "object",
"properties": {
"message": {
"type": "string",
"description": "terminate group chat message",
},
},
"required": ["message"],
},
}
],
}

def terminate_group_chat(message):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@afourney This is how to achieve a more robust terminating strategy via function_call, could you review it?

Copy link
Collaborator

@gagb gagb Nov 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LittleLittleCloud I am having trouble understand why this strategy is more robust?

Suggested strategy: one of the agents calls a group chat termination function. user proxy executes. manager detects termination string

Old strategy: one of the agent generates a termination string. manager detects.

If an agent is smart enough for suggested strategy then it should be able to also do old strategy? Sorry if I missed the argument for the increased robustness.

Copy link
Collaborator Author

@LittleLittleCloud LittleLittleCloud Nov 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If an agent is smart enough for suggested strategy then it should be able to also do old strategy?

Yes! Unfortunately, that's not the case in the real world, especially for gpt-3.5-turbo. For some quite obvious reason I still use gpt-3.5-turbo most frequently and gpt-3.5-turbo agents are not very robust in generating correct termination messages.

Even for gpt-4 agents, they might also fail to give the correct termination world when the conversation grows.

The new strategy (using termination_function_call) can make sure the group chat terminate correctly when that termination function_call get triggered. That strategy also works well on gpt-3.5-turbo which fined-tuned for function_call.

Copy link
Collaborator

@gagb gagb Nov 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it -- the fact that function calls are prioritized makes it more robust. I added this comment to #525

return f"[GROUPCHAT_TERMINATE] {message}"

user_proxy = autogen.UserProxyAgent(
name="Admin",
system_message="You terminate group chat when teacher says [COMPLETE].",
code_execution_config=False,
llm_config=llm_config_for_user_proxy,
human_input_mode="NEVER",
function_map={"terminate_group_chat": terminate_group_chat},
)

llm_config_for_teacher = {
**gpt3_5_config,
"functions": [
{
"name": "create_math_question",
"description": "create pre-school math question for student to resolve",
"parameters": {
"type": "object",
"properties": {
"question": {
"type": "string",
"description": "pre-school math question",
},
"i": {
"type": "integer",
"description": "question index",
},
},
"required": ["question", "i"],
},
}
],
}

def create_math_question(question, i):
return f"[QUESTION] this is question #{i}: {question}"

teacher = autogen.AssistantAgent(
"teacher",
system_message="""You are a pre-school math teacher, you create 3 math questions for student to resolve.
Here's your workflow:
-workflow-
if question count > 3 say [COMPLETE].
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps it would be better to make the if-else grammar consistent in your code.

else create_math_question
if answer is correct:
create_math_question
else:
ask student to resolve the question again
""",
llm_config=llm_config_for_teacher,
function_map={"create_math_question": create_math_question},
)

llm_config_for_student = {
**gpt3_5_config,
"functions": [
{
"name": "answer_math_question",
"description": "answer math question from teacher",
"parameters": {
"type": "object",
"properties": {
"answer": {
"type": "string",
"description": "answer",
},
},
"required": ["answer"],
},
}
],
}

def answer_math_question(answer):
return f"[ANSWER] {answer}"

student = autogen.AssistantAgent(
"student",
system_message="""You are a pre-school student, you resolve the math questions from teacher.
Here's your workflow:
-workflow-
if question is received:
call answer_math_question
else:
ask teacher to create a question
""",
llm_config=llm_config_for_student,
function_map={"answer_math_question": answer_math_question},
)
groupchat = autogen.GroupChat(agents=[user_proxy, student, teacher], messages=[], max_round=25)
manager = autogen.GroupChatManager(
groupchat=groupchat,
llm_config=gpt3_5_config,
is_termination_msg=lambda message: message.startswith("[GROUPCHAT_TERMINATE]"),
)
user_proxy.send(
"welcome to the class. I'm admin here. Teacher, you create 3 math questions for student to answer. Let me know when student resolve all questions.",
manager,
)

teacher.send("I'm teacher, I will create 3 math questions for student to answer.", manager)
student.send("I'm student, I will answer teacher's questions.", manager)

user_proxy.initiate_chat(
manager,
message="""teacher, please start""",
)

assert len(groupchat.messages) < 25

# verify if admin says [GROUPCHAT_TERMINATE]
terminate_message = filter(
lambda message: message["content"].startswith("[GROUPCHAT_TERMINATE]"), groupchat.messages
)
assert len(list(terminate_message)) == 1

# verify if teacher gives 3 questions
question_message = filter(lambda message: message["content"].startswith("[QUESTION]"), groupchat.messages)
assert len(list(question_message)) == 3

# verify if student gives more than 3 answers (student might give more than 3 answers if student's answer is not correct)
answer_message = filter(lambda message: message["content"].startswith("[ANSWER]"), groupchat.messages)
assert len(list(answer_message)) >= 3


def test_chat_manager():
agent1 = autogen.ConversableAgent(
"alice",
Expand Down Expand Up @@ -112,8 +289,16 @@ def test_plugin():
assert len(groupchat.messages) == 2


def skip_if_openai_not_available():
try:
import openai
except ImportError:
pytest.skip("OpenAI package not found.")


if __name__ == "__main__":
test_func_call_groupchat()
test_group_chat_math_class()
# test_func_call_groupchat()
# test_broadcast()
test_chat_manager()
# test_chat_manager()
# test_plugin()
Loading