From db287183f2ec9a3e58021da6f6048c15f31d94b5 Mon Sep 17 00:00:00 2001 From: Eric Zhu Date: Tue, 1 Oct 2024 14:53:25 -0700 Subject: [PATCH] Disable Azure tests and fix open ai tests (#3587) * Disable Azure tests * fix calculator notebook * use gpt-4o-mini for tests * use gpt-4o * use gpt-4o * fix formatting * Fix models used in contrib tests * Fix retrieval test * WIP * Skip * Fix format * Fix formatting --- .../agentchat/contrib/vectordb/pgvectordb.py | 14 +++++----- notebook/agentchat_MathChat.ipynb | 4 +-- ...at_auto_feedback_from_code_execution.ipynb | 4 +-- notebook/agentchat_cost_token_tracking.ipynb | 2 +- ...at_function_call_currency_calculator.ipynb | 2 +- ...tchat_groupchat_finite_state_machine.ipynb | 2 +- notebook/agentchat_groupchat_stateflow.ipynb | 2 +- .../contrib/agent_eval/test_agent_eval.py | 8 +----- .../capabilities/chat_with_teachable_agent.py | 3 ++- .../test_image_generation_capability.py | 2 -- .../capabilities/test_teachable_agent.py | 3 ++- .../test_pgvector_retrievechat.py | 2 +- .../retrievechat/test_qdrant_retrievechat.py | 2 ++ .../contrib/retrievechat/test_retrievechat.py | 4 ++- .../agentchat/contrib/test_agent_optimizer.py | 2 +- test/agentchat/contrib/test_gpt_assistant.py | 27 ++++++++++--------- test/agentchat/contrib/test_web_surfer.py | 2 +- test/agentchat/test_tool_calls.py | 2 +- test/io/test_websockets.py | 8 +----- test/oai/_test_completion.py | 14 ++-------- test/oai/test_client.py | 6 +++-- test/oai/test_client_stream.py | 3 ++- 22 files changed, 52 insertions(+), 66 deletions(-) diff --git a/autogen/agentchat/contrib/vectordb/pgvectordb.py b/autogen/agentchat/contrib/vectordb/pgvectordb.py index 6fce4a6db80..64f4ff333a9 100644 --- a/autogen/agentchat/contrib/vectordb/pgvectordb.py +++ b/autogen/agentchat/contrib/vectordb/pgvectordb.py @@ -4,16 +4,17 @@ from typing import Callable, List, Optional, Union import numpy as np + +# try: +import pgvector +from pgvector.psycopg import register_vector from sentence_transformers import SentenceTransformer from .base import Document, ItemID, QueryResults, VectorDB from .utils import get_logger -try: - import pgvector - from pgvector.psycopg import register_vector -except ImportError: - raise ImportError("Please install pgvector: `pip install pgvector`") +# except ImportError: +# raise ImportError("Please install pgvector: `pip install pgvector`") try: import psycopg @@ -416,6 +417,7 @@ def query( results = [] for query_text in query_texts: vector = self.embedding_function(query_text) + vector_string = "[" + ",".join([f"{x:.8f}" for x in vector]) + "]" if distance_type.lower() == "cosine": index_function = "<=>" @@ -428,7 +430,7 @@ def query( query = ( f"SELECT id, documents, embedding, metadatas " f"FROM {self.name} " - f"{clause} embedding {index_function} '{str(vector)}' {distance_threshold} " + f"{clause} embedding {index_function} '{vector_string}' {distance_threshold} " f"LIMIT {n_results}" ) cursor.execute(query) diff --git a/notebook/agentchat_MathChat.ipynb b/notebook/agentchat_MathChat.ipynb index afa00fb7562..db7c6594d99 100644 --- a/notebook/agentchat_MathChat.ipynb +++ b/notebook/agentchat_MathChat.ipynb @@ -57,9 +57,7 @@ " \"OAI_CONFIG_LIST\",\n", " filter_dict={\n", " \"model\": {\n", - " \"gpt-4-1106-preview\",\n", - " \"gpt-3.5-turbo\",\n", - " \"gpt-35-turbo\",\n", + " \"gpt-4o\",\n", " }\n", " },\n", ")" diff --git a/notebook/agentchat_auto_feedback_from_code_execution.ipynb b/notebook/agentchat_auto_feedback_from_code_execution.ipynb index 6ea6f662b93..51b5a591734 100644 --- a/notebook/agentchat_auto_feedback_from_code_execution.ipynb +++ b/notebook/agentchat_auto_feedback_from_code_execution.ipynb @@ -37,10 +37,10 @@ "\n", "config_list = autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", - " filter_dict={\"tags\": [\"gpt-4\"]}, # comment out to get all\n", + " filter_dict={\"tags\": [\"gpt-4o\"]}, # comment out to get all\n", ")\n", "# When using a single openai endpoint, you can use the following:\n", - "# config_list = [{\"model\": \"gpt-4\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]" + "# config_list = [{\"model\": \"gpt-4o\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]" ] }, { diff --git a/notebook/agentchat_cost_token_tracking.ipynb b/notebook/agentchat_cost_token_tracking.ipynb index d1fc9ccd8d1..a60fd6de15e 100644 --- a/notebook/agentchat_cost_token_tracking.ipynb +++ b/notebook/agentchat_cost_token_tracking.ipynb @@ -79,7 +79,7 @@ "config_list = autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", " filter_dict={\n", - " \"model\": [\"gpt-3.5-turbo\", \"gpt-3.5-turbo-16k\"], # comment out to get all\n", + " \"model\": [\"gpt-3.5-turbo\"], # comment out to get all\n", " },\n", ")" ] diff --git a/notebook/agentchat_function_call_currency_calculator.ipynb b/notebook/agentchat_function_call_currency_calculator.ipynb index ac65ba560f9..34ff92ff91a 100644 --- a/notebook/agentchat_function_call_currency_calculator.ipynb +++ b/notebook/agentchat_function_call_currency_calculator.ipynb @@ -65,7 +65,7 @@ "\n", "config_list = autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", - " filter_dict={\"tags\": [\"3.5-tool\"]}, # comment out to get all\n", + " filter_dict={\"tags\": [\"tool\"]}, # comment out to get all\n", ")" ] }, diff --git a/notebook/agentchat_groupchat_finite_state_machine.ipynb b/notebook/agentchat_groupchat_finite_state_machine.ipynb index 8ef101f7d91..74b6f3d4047 100644 --- a/notebook/agentchat_groupchat_finite_state_machine.ipynb +++ b/notebook/agentchat_groupchat_finite_state_machine.ipynb @@ -94,7 +94,7 @@ " \"cache_seed\": 44, # change the seed for different trials\n", " \"config_list\": autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", - " filter_dict={\"tags\": [\"gpt-4\", \"gpt-4-32k\"]}, # comment out to get all\n", + " filter_dict={\"tags\": [\"gpt-4o\"]}, # comment out to get all\n", " ),\n", " \"temperature\": 0,\n", "}" diff --git a/notebook/agentchat_groupchat_stateflow.ipynb b/notebook/agentchat_groupchat_stateflow.ipynb index 53eb0f2ff98..3081056eac9 100644 --- a/notebook/agentchat_groupchat_stateflow.ipynb +++ b/notebook/agentchat_groupchat_stateflow.ipynb @@ -43,7 +43,7 @@ "config_list = autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", " filter_dict={\n", - " \"tags\": [\"gpt-4\", \"gpt-4-32k\"],\n", + " \"tags\": [\"gpt-4o\"],\n", " },\n", ")" ] diff --git a/test/agentchat/contrib/agent_eval/test_agent_eval.py b/test/agentchat/contrib/agent_eval/test_agent_eval.py index 1ae7562a372..57f5d20d365 100644 --- a/test/agentchat/contrib/agent_eval/test_agent_eval.py +++ b/test/agentchat/contrib/agent_eval/test_agent_eval.py @@ -32,13 +32,8 @@ def remove_ground_truth(test_case: str): filter_dict={ "api_type": ["openai"], "model": [ - "gpt-4-turbo", - "gpt-4-turbo-preview", - "gpt-4-0125-preview", - "gpt-4-1106-preview", + "gpt-4o-mini", "gpt-3.5-turbo", - "gpt-3.5-turbo-0125", - "gpt-3.5-turbo-1106", ], }, ) @@ -46,7 +41,6 @@ def remove_ground_truth(test_case: str): aoai_config_list = autogen.config_list_from_json( OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"api_type": ["azure"]}, ) success_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_successful.txt", "r").read() diff --git a/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py b/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py index 14599c06397..8042d988604 100755 --- a/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py +++ b/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py @@ -11,7 +11,8 @@ from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402 # Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input. -filter_dict = {"model": ["gpt-4-0125-preview"]} +filter_dict = {"model": ["gpt-4o-mini"]} +# filter_dict = {"model": ["gpt-4-0125-preview"]} # filter_dict = {"model": ["gpt-3.5-turbo-1106"]} # filter_dict = {"model": ["gpt-4-0613"]} # filter_dict = {"model": ["gpt-3.5-turbo"]} diff --git a/test/agentchat/contrib/capabilities/test_image_generation_capability.py b/test/agentchat/contrib/capabilities/test_image_generation_capability.py index fa62d1f46a1..39b4d555ceb 100644 --- a/test/agentchat/contrib/capabilities/test_image_generation_capability.py +++ b/test/agentchat/contrib/capabilities/test_image_generation_capability.py @@ -26,8 +26,6 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "../..")) from conftest import MOCK_OPEN_AI_API_KEY, skip_openai # noqa: E402 -filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} - RESOLUTIONS = ["256x256", "512x512", "1024x1024"] QUALITIES = ["standard", "hd"] PROMPTS = [ diff --git a/test/agentchat/contrib/capabilities/test_teachable_agent.py b/test/agentchat/contrib/capabilities/test_teachable_agent.py index 720bdc7ef6d..a823f02946c 100755 --- a/test/agentchat/contrib/capabilities/test_teachable_agent.py +++ b/test/agentchat/contrib/capabilities/test_teachable_agent.py @@ -28,7 +28,8 @@ # filter_dict={"model": ["gpt-3.5-turbo-1106"]} # filter_dict={"model": ["gpt-3.5-turbo-0613"]} # filter_dict={"model": ["gpt-4"]} -filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} +# filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]} +filter_dict = {"model": ["gpt-4o-mini"]} def create_teachable_agent(reset_db=False, verbosity=0): diff --git a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py index 3c566352b3e..52b14d42101 100644 --- a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py +++ b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py @@ -78,7 +78,7 @@ def test_retrievechat(): }, "embedding_function": sentence_transformer_ef, "get_or_create": True, # set to False if you don't want to reuse an existing collection - "overwrite": False, # set to True if you want to overwrite an existing collection + "overwrite": True, # set to True if you want to overwrite an existing collection }, code_execution_config=False, # set to False if you don't want to execute the code ) diff --git a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py index 92ca5aa603a..3cc8b8780b8 100755 --- a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py +++ b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py @@ -69,6 +69,8 @@ def test_retrievechat(): "client": client, "docs_path": "./website/docs", "chunk_token_size": 2000, + "get_or_create": True, + "overwrite": True, }, ) diff --git a/test/agentchat/contrib/retrievechat/test_retrievechat.py b/test/agentchat/contrib/retrievechat/test_retrievechat.py index 0504fc82be4..d585be36cbf 100755 --- a/test/agentchat/contrib/retrievechat/test_retrievechat.py +++ b/test/agentchat/contrib/retrievechat/test_retrievechat.py @@ -54,17 +54,19 @@ def test_retrievechat(): ) sentence_transformer_ef = ef.SentenceTransformerEmbeddingFunction() + docs_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../website/docs")) ragproxyagent = RetrieveUserProxyAgent( name="ragproxyagent", human_input_mode="NEVER", max_consecutive_auto_reply=2, retrieve_config={ - "docs_path": "./website/docs", + "docs_path": docs_path, "chunk_token_size": 2000, "model": config_list[0]["model"], "client": chromadb.PersistentClient(path="/tmp/chromadb"), "embedding_function": sentence_transformer_ef, "get_or_create": True, + "overwrite": True, }, ) diff --git a/test/agentchat/contrib/test_agent_optimizer.py b/test/agentchat/contrib/test_agent_optimizer.py index 9587c9d5975..d31bd9341af 100644 --- a/test/agentchat/contrib/test_agent_optimizer.py +++ b/test/agentchat/contrib/test_agent_optimizer.py @@ -89,7 +89,7 @@ def test_step(): max_consecutive_auto_reply=3, ) - optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config) + optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config, optimizer_model="gpt-4o-mini") user_proxy.initiate_chat(assistant, message=problem) optimizer.record_one_conversation(assistant.chat_messages_for_summary(user_proxy), is_satisfied=True) diff --git a/test/agentchat/contrib/test_gpt_assistant.py b/test/agentchat/contrib/test_gpt_assistant.py index 7132cb72053..15dcd12d16c 100755 --- a/test/agentchat/contrib/test_gpt_assistant.py +++ b/test/agentchat/contrib/test_gpt_assistant.py @@ -40,11 +40,12 @@ ], }, ) - aoai_config_list = autogen.config_list_from_json( - OAI_CONFIG_LIST, - file_location=KEY_LOC, - filter_dict={"api_type": ["azure"], "tags": ["assistant"]}, - ) + # TODO: fix azure settings or remove it. + # aoai_config_list = autogen.config_list_from_json( + # OAI_CONFIG_LIST, + # file_location=KEY_LOC, + # filter_dict={"api_type": ["azure"], "tags": ["assistant"]}, + # ) @pytest.mark.skipif( @@ -53,7 +54,8 @@ ) def test_config_list() -> None: assert len(openai_config_list) > 0 - assert len(aoai_config_list) > 0 + # TODO: fix azure settings or remove it. + # assert len(aoai_config_list) > 0 @pytest.mark.skipif( @@ -61,9 +63,8 @@ def test_config_list() -> None: reason=reason, ) def test_gpt_assistant_chat() -> None: - for gpt_config in [openai_config_list, aoai_config_list]: - _test_gpt_assistant_chat({"config_list": gpt_config}) - _test_gpt_assistant_chat(gpt_config[0]) + _test_gpt_assistant_chat({"config_list": openai_config_list}) + _test_gpt_assistant_chat(openai_config_list[0]) def _test_gpt_assistant_chat(gpt_config) -> None: @@ -135,8 +136,8 @@ def ask_ossinsight(question: str) -> str: reason=reason, ) def test_get_assistant_instructions() -> None: - for gpt_config in [openai_config_list, aoai_config_list]: - _test_get_assistant_instructions(gpt_config) + _test_get_assistant_instructions(openai_config_list) + # _test_get_assistant_instructions(aoai_config_list) def _test_get_assistant_instructions(gpt_config) -> None: @@ -164,8 +165,8 @@ def _test_get_assistant_instructions(gpt_config) -> None: reason=reason, ) def test_gpt_assistant_instructions_overwrite() -> None: - for gpt_config in [openai_config_list, aoai_config_list]: - _test_gpt_assistant_instructions_overwrite(gpt_config) + _test_gpt_assistant_instructions_overwrite(openai_config_list) + # _test_gpt_assistant_instructions_overwrite(aoai_config_list) def _test_gpt_assistant_instructions_overwrite(gpt_config) -> None: diff --git a/test/agentchat/contrib/test_web_surfer.py b/test/agentchat/contrib/test_web_surfer.py index fad336b6b76..97f7e39bb46 100644 --- a/test/agentchat/contrib/test_web_surfer.py +++ b/test/agentchat/contrib/test_web_surfer.py @@ -97,7 +97,7 @@ def test_web_surfer_oai() -> None: llm_config = {"config_list": config_list, "timeout": 180, "cache_seed": 42} # adding Azure name variations to the model list - model = ["gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"] + model = ["gpt-4o-mini"] model += [m.replace(".", "") for m in model] summarizer_llm_config = { diff --git a/test/agentchat/test_tool_calls.py b/test/agentchat/test_tool_calls.py index e7d45c5918d..fa7602104fa 100755 --- a/test/agentchat/test_tool_calls.py +++ b/test/agentchat/test_tool_calls.py @@ -144,7 +144,7 @@ def test_update_tool(): config_list_gpt4 = autogen.config_list_from_json( OAI_CONFIG_LIST, filter_dict={ - "tags": ["gpt-4"], + "tags": ["gpt-4o-mini"], }, file_location=KEY_LOC, ) diff --git a/test/io/test_websockets.py b/test/io/test_websockets.py index 1458afcd88c..ee2085169e0 100644 --- a/test/io/test_websockets.py +++ b/test/io/test_websockets.py @@ -97,14 +97,8 @@ def on_connect(iostream: IOWebsockets, success_dict: Dict[str, bool] = success_d OAI_CONFIG_LIST, filter_dict={ "model": [ + "gpt-4o-mini", "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-4", - "gpt-4-0314", - "gpt4", - "gpt-4-32k", - "gpt-4-32k-0314", - "gpt-4-32k-v0314", ], }, file_location=KEY_LOC, diff --git a/test/oai/_test_completion.py b/test/oai/_test_completion.py index fe410255d2f..af479ca5651 100755 --- a/test/oai/_test_completion.py +++ b/test/oai/_test_completion.py @@ -143,13 +143,8 @@ def test_nocontext(): file_location=KEY_LOC, filter_dict={ "model": { + "gpt-4o-mini", "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-16k-0613", - "gpt-3.5-turbo-0301", - "chatgpt-35-turbo-0301", - "gpt-35-turbo-v0301", - "gpt", }, }, ), @@ -179,13 +174,8 @@ def test_humaneval(num_samples=1): env_or_file=OAI_CONFIG_LIST, filter_dict={ "model": { + "gpt-4o-mini", "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-16k-0613", - "gpt-3.5-turbo-0301", - "chatgpt-35-turbo-0301", - "gpt-35-turbo-v0301", - "gpt", }, }, file_location=KEY_LOC, diff --git a/test/oai/test_client.py b/test/oai/test_client.py index bd8b072e612..3387001cf4a 100755 --- a/test/oai/test_client.py +++ b/test/oai/test_client.py @@ -66,7 +66,8 @@ def get_usage(response): return {} -@pytest.mark.skipif(skip, reason="openai>=1 not installed") +# @pytest.mark.skipif(skip, reason="openai>=1 not installed") +@pytest.mark.skip(reason="This test is not working until Azure settings are updated") def test_aoai_chat_completion(): config_list = config_list_from_json( env_or_file=OAI_CONFIG_LIST, @@ -88,7 +89,8 @@ def test_aoai_chat_completion(): print(client.extract_text_or_completion_object(response)) -@pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed") +# @pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed") +@pytest.mark.skip(reason="This test is not working until Azure settings are updated") def test_oai_tool_calling_extraction(): config_list = config_list_from_json( env_or_file=OAI_CONFIG_LIST, diff --git a/test/oai/test_client_stream.py b/test/oai/test_client_stream.py index 59abd97151a..405bc32be27 100755 --- a/test/oai/test_client_stream.py +++ b/test/oai/test_client_stream.py @@ -33,6 +33,7 @@ @pytest.mark.skipif(skip, reason="openai>=1 not installed") +@pytest.mark.skip(reason="This test is not working until Azure settings are updated.") def test_aoai_chat_completion_stream() -> None: config_list = config_list_from_json( env_or_file=OAI_CONFIG_LIST, @@ -236,7 +237,7 @@ def test_chat_tools_stream() -> None: config_list = config_list_from_json( env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC, - filter_dict={"tags": ["multitool"]}, + filter_dict={"tags": ["tool"]}, ) tools = [ {