microsoft · sonichi · Mar 31, 2024 · Mar 28, 2024 · Mar 29, 2024 · Mar 29, 2024
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
@@ -61,6 +61,7 @@ def custom_speaker_selection_func(
         "clear history" phrase in user prompt. This is experimental feature.
         See description of GroupChatManager.clear_agents_history function for more info.
     - send_introductions: send a round of introductions at the start of the group chat, so agents know who they can speak to (default: False)
+    - role_for_select_speaker_messages: sets the role name for speaker selection when in 'auto' mode, typically 'user' or 'system'. (default: 'system')
     """
 
     agents: List[Agent]
@@ -74,6 +75,7 @@ def custom_speaker_selection_func(
     speaker_transitions_type: Literal["allowed", "disallowed", None] = None
     enable_clear_history: Optional[bool] = False
     send_introductions: bool = False
+    role_for_select_speaker_messages: Optional[str] = "system"
 
     _VALID_SPEAKER_SELECTION_METHODS = ["auto", "manual", "random", "round_robin"]
     _VALID_SPEAKER_TRANSITIONS_TYPE = ["allowed", "disallowed", None]
@@ -162,6 +164,9 @@ def __post_init__(self):
             agents=self.agents,
         )
 
+        if self.role_for_select_speaker_messages is None or len(self.role_for_select_speaker_messages) == 0:
+            raise ValueError("role_for_select_speaker_messages cannot be empty or None.")
+
     @property
     def agent_names(self) -> List[str]:
         """Return the names of the agents in the group chat."""
@@ -407,7 +412,7 @@ def _prepare_and_select_agents(
             selected_agent = self.next_agent(last_speaker, graph_eligible_agents)
         elif speaker_selection_method.lower() == "random":
             selected_agent = self.random_select_speaker(graph_eligible_agents)
-        else:
+        else:  # auto
             selected_agent = None
             select_speaker_messages = self.messages.copy()
             # If last message is a tool call or function call, blank the call so the api doesn't throw
@@ -416,7 +421,10 @@ def _prepare_and_select_agents(
             if select_speaker_messages[-1].get("tool_calls", False):
                 select_speaker_messages[-1] = dict(select_speaker_messages[-1], tool_calls=None)
             select_speaker_messages = select_speaker_messages + [
-                {"role": "system", "content": self.select_speaker_prompt(graph_eligible_agents)}
+                {
+                    "role": self.role_for_select_speaker_messages,
+                    "content": self.select_speaker_prompt(graph_eligible_agents),
+                }
             ]
         return selected_agent, graph_eligible_agents, select_speaker_messages
 

diff --git a/test/agentchat/test_groupchat.py b/test/agentchat/test_groupchat.py
@@ -1176,6 +1176,71 @@ def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat) ->
     assert "teamA_executor" in speakers
 
 
+def test_role_for_select_speaker_messages():
+    agent1 = autogen.ConversableAgent(
+        "alice",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is alice speaking.",
+    )
+    agent2 = autogen.ConversableAgent(
+        "bob",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is bob speaking.",
+    )
+
+    groupchat = autogen.GroupChat(
+        agents=[agent1, agent2],
+        messages=[{"role": "user", "content": "Let's have a chat!"}],
+        max_round=3,
+    )
+
+    # Run the select agents function to get the select speaker messages
+    selected_agent, agents, messages = groupchat._prepare_and_select_agents(agent1)
+
+    # Test default is "system"
+    assert len(messages) == 2
+    assert messages[-1]["role"] == "system"
+
+    # Test as "user"
+    groupchat.role_for_select_speaker_messages = "user"
+    selected_agent, agents, messages = groupchat._prepare_and_select_agents(agent1)
+
+    assert len(messages) == 2
+    assert messages[-1]["role"] == "user"
+
+    # Test as something unusual
+    groupchat.role_for_select_speaker_messages = "SockS"
+    selected_agent, agents, messages = groupchat._prepare_and_select_agents(agent1)
+
+    assert len(messages) == 2
+    assert messages[-1]["role"] == "SockS"
+
+    # Test empty string and None isn't accepted
+
+    # Test with empty strings
+    with pytest.raises(ValueError) as e:
+        groupchat = autogen.GroupChat(
+            agents=[agent1, agent2],
+            messages=[{"role": "user", "content": "Let's have a chat!"}],
+            max_round=3,
+            role_for_select_speaker_messages="",
+        )
+    assert "role_for_select_speaker_messages cannot be empty or None." in str(e.value)
+
+    with pytest.raises(ValueError) as e:
+        groupchat = autogen.GroupChat(
+            agents=[agent1, agent2],
+            messages=[{"role": "user", "content": "Let's have a chat!"}],
+            max_round=3,
+            role_for_select_speaker_messages=None,
+        )
+    assert "role_for_select_speaker_messages cannot be empty or None." in str(e.value)
+
+
 if __name__ == "__main__":
     # test_func_call_groupchat()
     # test_broadcast()
@@ -1190,5 +1255,6 @@ def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat) ->
     # test_invalid_allow_repeat_speaker()
     # test_graceful_exit_before_max_round()
     # test_clear_agents_history()
-    test_custom_speaker_selection_overrides_transition_graph()
+    # test_custom_speaker_selection_overrides_transition_graph()
+    test_role_for_select_speaker_messages()
     # pass
diff --git a/website/docs/topics/non-openai-models/about-using-nonopenai-models.md b/website/docs/topics/non-openai-models/about-using-nonopenai-models.md
@@ -21,8 +21,8 @@ These proxy servers can be cloud-based or running locally within your environmen
 By using cloud-based proxy servers, you are able to use models without requiring the hardware
 and software to run them.
 
-These providers can host open source/weight models, like [Hugging Face](https://huggingface.co/),
-or their own closed models.
+These providers can host open source/weight models, like [Hugging Face](https://huggingface.co/)
+and [Mistral AI](https://mistral.ai/), or their own closed models.
 
 When cloud-based proxy servers provide an OpenAI-compatible API, using them in AutoGen
 is straightforward. With [LLM Configuration](/docs/topics/llm_configuration) done in
@@ -33,7 +33,7 @@ Examples of using cloud-based proxy servers providers that have an OpenAI-compat
 are provided below:
 
 - [together.ai example](/docs/topics/non-openai-models/cloud-togetherai)
-
+- [Mistral AI example](/docs/topics/non-openai-models/cloud-mistralai)
 
 ### Locally run proxy servers
 An increasing number of LLM proxy servers are available for use locally. These can be

diff --git a/website/docs/topics/non-openai-models/best-tips-for-nonopenai-models.md b/website/docs/topics/non-openai-models/best-tips-for-nonopenai-models.md
@@ -0,0 +1,59 @@
+# Tips for Non-OpenAI Models
+
+Here are some tips for using non-OpenAI Models with AutoGen.
+
+## Finding the right model
+Every model will perform differently across the operations within your AutoGen
+setup, such as speaker selection, coding, function calling, content creation,
+etc. On the whole, larger models (13B+) perform better with following directions
+and providing more cohesive responses.
+
+Content creation can be performed by most models.
+
+Fine-tuned models can be great for very specific tasks, such as function calling
+and coding.
+
+Specific tasks, such as speaker selection in a Group Chat scenario, that require
+very accurate outputs can be a challenge with most open source/weight models. The
+use of chain-of-thought and/or few-shot prompting can help guide the LLM to provide
+the output in the format you want.
+
+## Validating your program
+Testing your AutoGen setup against a very large LLM, such as OpenAI's ChatGPT or
+Anthropic's Claude 3, can help validate your agent setup and configuration.
+
+Once a setup is performing as you want, you can replace the models for your agents
+with non-OpenAI models and iteratively tweak system messages, prompts, and model
+selection.
+
+## Chat template
+AutoGen utilises a set of chat messages for the conversation between AutoGen/user
+and LLMs. Each chat message has a role attribute that is typically `user`,
+`assistant`, or `system`.
+
+A chat template is applied during inference and some chat templates implement rules about
+what roles can be used in specific sequences of messages.
+
+For example, when using Mistral AI's API the last chat message must have a role of `user`.
+In a Group Chat scenario the message used to select the next speaker will have a role of
+`system` by default and the API will throw an exception for this step. To overcome this the
+GroupChat's constructor has a parameter called `role_for_select_speaker_messages` that can
+be used to change the role name to `user`.
+
+```python
+groupchat = autogen.GroupChat(
+    agents=[user_proxy, coder, pm],
+    messages=[],
+    max_round=12,
+    # Role for select speaker message will be set to 'user' instead of 'system'
+    role_for_select_speaker_messages='user',
+)
+```
+
+If the chat template associated with a model you want to use doesn't support the role
+sequence and names used in AutoGen you can modify the chat template. See an example of
+this on our [vLLM page](/docs/topics/non-openai-models/local-vllm#chat-template).
+
+## Discord
+Join AutoGen's [#alt-models](https://discord.com/channels/1153072414184452236/1201369716057440287)
+channel on their Discord and discuss non-OpenAI models and configurations.