Skip to content

Commit

Permalink
Disable Azure tests and fix open ai tests (#3587)
Browse files Browse the repository at this point in the history
* Disable Azure tests

* fix calculator notebook

* use gpt-4o-mini for tests

* use gpt-4o

* use gpt-4o

* fix formatting

* Fix models used in contrib tests

* Fix retrieval test

* WIP

* Skip

* Fix format

* Fix formatting
  • Loading branch information
ekzhu authored Oct 1, 2024
1 parent 43b0d59 commit db28718
Show file tree
Hide file tree
Showing 22 changed files with 52 additions and 66 deletions.
14 changes: 8 additions & 6 deletions autogen/agentchat/contrib/vectordb/pgvectordb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
from typing import Callable, List, Optional, Union

import numpy as np

# try:
import pgvector
from pgvector.psycopg import register_vector
from sentence_transformers import SentenceTransformer

from .base import Document, ItemID, QueryResults, VectorDB
from .utils import get_logger

try:
import pgvector
from pgvector.psycopg import register_vector
except ImportError:
raise ImportError("Please install pgvector: `pip install pgvector`")
# except ImportError:
# raise ImportError("Please install pgvector: `pip install pgvector`")

try:
import psycopg
Expand Down Expand Up @@ -416,6 +417,7 @@ def query(
results = []
for query_text in query_texts:
vector = self.embedding_function(query_text)
vector_string = "[" + ",".join([f"{x:.8f}" for x in vector]) + "]"

if distance_type.lower() == "cosine":
index_function = "<=>"
Expand All @@ -428,7 +430,7 @@ def query(
query = (
f"SELECT id, documents, embedding, metadatas "
f"FROM {self.name} "
f"{clause} embedding {index_function} '{str(vector)}' {distance_threshold} "
f"{clause} embedding {index_function} '{vector_string}' {distance_threshold} "
f"LIMIT {n_results}"
)
cursor.execute(query)
Expand Down
4 changes: 1 addition & 3 deletions notebook/agentchat_MathChat.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@
" \"OAI_CONFIG_LIST\",\n",
" filter_dict={\n",
" \"model\": {\n",
" \"gpt-4-1106-preview\",\n",
" \"gpt-3.5-turbo\",\n",
" \"gpt-35-turbo\",\n",
" \"gpt-4o\",\n",
" }\n",
" },\n",
")"
Expand Down
4 changes: 2 additions & 2 deletions notebook/agentchat_auto_feedback_from_code_execution.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
"\n",
"config_list = autogen.config_list_from_json(\n",
" \"OAI_CONFIG_LIST\",\n",
" filter_dict={\"tags\": [\"gpt-4\"]}, # comment out to get all\n",
" filter_dict={\"tags\": [\"gpt-4o\"]}, # comment out to get all\n",
")\n",
"# When using a single openai endpoint, you can use the following:\n",
"# config_list = [{\"model\": \"gpt-4\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]"
"# config_list = [{\"model\": \"gpt-4o\", \"api_key\": os.getenv(\"OPENAI_API_KEY\")}]"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion notebook/agentchat_cost_token_tracking.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
"config_list = autogen.config_list_from_json(\n",
" \"OAI_CONFIG_LIST\",\n",
" filter_dict={\n",
" \"model\": [\"gpt-3.5-turbo\", \"gpt-3.5-turbo-16k\"], # comment out to get all\n",
" \"model\": [\"gpt-3.5-turbo\"], # comment out to get all\n",
" },\n",
")"
]
Expand Down
2 changes: 1 addition & 1 deletion notebook/agentchat_function_call_currency_calculator.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
"\n",
"config_list = autogen.config_list_from_json(\n",
" \"OAI_CONFIG_LIST\",\n",
" filter_dict={\"tags\": [\"3.5-tool\"]}, # comment out to get all\n",
" filter_dict={\"tags\": [\"tool\"]}, # comment out to get all\n",
")"
]
},
Expand Down
2 changes: 1 addition & 1 deletion notebook/agentchat_groupchat_finite_state_machine.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
" \"cache_seed\": 44, # change the seed for different trials\n",
" \"config_list\": autogen.config_list_from_json(\n",
" \"OAI_CONFIG_LIST\",\n",
" filter_dict={\"tags\": [\"gpt-4\", \"gpt-4-32k\"]}, # comment out to get all\n",
" filter_dict={\"tags\": [\"gpt-4o\"]}, # comment out to get all\n",
" ),\n",
" \"temperature\": 0,\n",
"}"
Expand Down
2 changes: 1 addition & 1 deletion notebook/agentchat_groupchat_stateflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"config_list = autogen.config_list_from_json(\n",
" \"OAI_CONFIG_LIST\",\n",
" filter_dict={\n",
" \"tags\": [\"gpt-4\", \"gpt-4-32k\"],\n",
" \"tags\": [\"gpt-4o\"],\n",
" },\n",
")"
]
Expand Down
8 changes: 1 addition & 7 deletions test/agentchat/contrib/agent_eval/test_agent_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,15 @@ def remove_ground_truth(test_case: str):
filter_dict={
"api_type": ["openai"],
"model": [
"gpt-4-turbo",
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
"gpt-4o-mini",
"gpt-3.5-turbo",
"gpt-3.5-turbo-0125",
"gpt-3.5-turbo-1106",
],
},
)

aoai_config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"api_type": ["azure"]},
)

success_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_successful.txt", "r").read()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402

# Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input.
filter_dict = {"model": ["gpt-4-0125-preview"]}
filter_dict = {"model": ["gpt-4o-mini"]}
# filter_dict = {"model": ["gpt-4-0125-preview"]}
# filter_dict = {"model": ["gpt-3.5-turbo-1106"]}
# filter_dict = {"model": ["gpt-4-0613"]}
# filter_dict = {"model": ["gpt-3.5-turbo"]}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from conftest import MOCK_OPEN_AI_API_KEY, skip_openai # noqa: E402

filter_dict = {"model": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}

RESOLUTIONS = ["256x256", "512x512", "1024x1024"]
QUALITIES = ["standard", "hd"]
PROMPTS = [
Expand Down
3 changes: 2 additions & 1 deletion test/agentchat/contrib/capabilities/test_teachable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
# filter_dict={"model": ["gpt-3.5-turbo-1106"]}
# filter_dict={"model": ["gpt-3.5-turbo-0613"]}
# filter_dict={"model": ["gpt-4"]}
filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
# filter_dict = {"tags": ["gpt-35-turbo-16k", "gpt-3.5-turbo-16k"]}
filter_dict = {"model": ["gpt-4o-mini"]}


def create_teachable_agent(reset_db=False, verbosity=0):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_retrievechat():
},
"embedding_function": sentence_transformer_ef,
"get_or_create": True, # set to False if you don't want to reuse an existing collection
"overwrite": False, # set to True if you want to overwrite an existing collection
"overwrite": True, # set to True if you want to overwrite an existing collection
},
code_execution_config=False, # set to False if you don't want to execute the code
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def test_retrievechat():
"client": client,
"docs_path": "./website/docs",
"chunk_token_size": 2000,
"get_or_create": True,
"overwrite": True,
},
)

Expand Down
4 changes: 3 additions & 1 deletion test/agentchat/contrib/retrievechat/test_retrievechat.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,19 @@ def test_retrievechat():
)

sentence_transformer_ef = ef.SentenceTransformerEmbeddingFunction()
docs_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../../website/docs"))
ragproxyagent = RetrieveUserProxyAgent(
name="ragproxyagent",
human_input_mode="NEVER",
max_consecutive_auto_reply=2,
retrieve_config={
"docs_path": "./website/docs",
"docs_path": docs_path,
"chunk_token_size": 2000,
"model": config_list[0]["model"],
"client": chromadb.PersistentClient(path="/tmp/chromadb"),
"embedding_function": sentence_transformer_ef,
"get_or_create": True,
"overwrite": True,
},
)

Expand Down
2 changes: 1 addition & 1 deletion test/agentchat/contrib/test_agent_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def test_step():
max_consecutive_auto_reply=3,
)

optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config)
optimizer = AgentOptimizer(max_actions_per_step=3, llm_config=llm_config, optimizer_model="gpt-4o-mini")
user_proxy.initiate_chat(assistant, message=problem)
optimizer.record_one_conversation(assistant.chat_messages_for_summary(user_proxy), is_satisfied=True)

Expand Down
27 changes: 14 additions & 13 deletions test/agentchat/contrib/test_gpt_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@
],
},
)
aoai_config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"api_type": ["azure"], "tags": ["assistant"]},
)
# TODO: fix azure settings or remove it.
# aoai_config_list = autogen.config_list_from_json(
# OAI_CONFIG_LIST,
# file_location=KEY_LOC,
# filter_dict={"api_type": ["azure"], "tags": ["assistant"]},
# )


@pytest.mark.skipif(
Expand All @@ -53,17 +54,17 @@
)
def test_config_list() -> None:
assert len(openai_config_list) > 0
assert len(aoai_config_list) > 0
# TODO: fix azure settings or remove it.
# assert len(aoai_config_list) > 0


@pytest.mark.skipif(
skip_openai,
reason=reason,
)
def test_gpt_assistant_chat() -> None:
for gpt_config in [openai_config_list, aoai_config_list]:
_test_gpt_assistant_chat({"config_list": gpt_config})
_test_gpt_assistant_chat(gpt_config[0])
_test_gpt_assistant_chat({"config_list": openai_config_list})
_test_gpt_assistant_chat(openai_config_list[0])


def _test_gpt_assistant_chat(gpt_config) -> None:
Expand Down Expand Up @@ -135,8 +136,8 @@ def ask_ossinsight(question: str) -> str:
reason=reason,
)
def test_get_assistant_instructions() -> None:
for gpt_config in [openai_config_list, aoai_config_list]:
_test_get_assistant_instructions(gpt_config)
_test_get_assistant_instructions(openai_config_list)
# _test_get_assistant_instructions(aoai_config_list)


def _test_get_assistant_instructions(gpt_config) -> None:
Expand Down Expand Up @@ -164,8 +165,8 @@ def _test_get_assistant_instructions(gpt_config) -> None:
reason=reason,
)
def test_gpt_assistant_instructions_overwrite() -> None:
for gpt_config in [openai_config_list, aoai_config_list]:
_test_gpt_assistant_instructions_overwrite(gpt_config)
_test_gpt_assistant_instructions_overwrite(openai_config_list)
# _test_gpt_assistant_instructions_overwrite(aoai_config_list)


def _test_gpt_assistant_instructions_overwrite(gpt_config) -> None:
Expand Down
2 changes: 1 addition & 1 deletion test/agentchat/contrib/test_web_surfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_web_surfer_oai() -> None:
llm_config = {"config_list": config_list, "timeout": 180, "cache_seed": 42}

# adding Azure name variations to the model list
model = ["gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"]
model = ["gpt-4o-mini"]
model += [m.replace(".", "") for m in model]

summarizer_llm_config = {
Expand Down
2 changes: 1 addition & 1 deletion test/agentchat/test_tool_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_update_tool():
config_list_gpt4 = autogen.config_list_from_json(
OAI_CONFIG_LIST,
filter_dict={
"tags": ["gpt-4"],
"tags": ["gpt-4o-mini"],
},
file_location=KEY_LOC,
)
Expand Down
8 changes: 1 addition & 7 deletions test/io/test_websockets.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,8 @@ def on_connect(iostream: IOWebsockets, success_dict: Dict[str, bool] = success_d
OAI_CONFIG_LIST,
filter_dict={
"model": [
"gpt-4o-mini",
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-4",
"gpt-4-0314",
"gpt4",
"gpt-4-32k",
"gpt-4-32k-0314",
"gpt-4-32k-v0314",
],
},
file_location=KEY_LOC,
Expand Down
14 changes: 2 additions & 12 deletions test/oai/_test_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,8 @@ def test_nocontext():
file_location=KEY_LOC,
filter_dict={
"model": {
"gpt-4o-mini",
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-0301",
"chatgpt-35-turbo-0301",
"gpt-35-turbo-v0301",
"gpt",
},
},
),
Expand Down Expand Up @@ -179,13 +174,8 @@ def test_humaneval(num_samples=1):
env_or_file=OAI_CONFIG_LIST,
filter_dict={
"model": {
"gpt-4o-mini",
"gpt-3.5-turbo",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-0301",
"chatgpt-35-turbo-0301",
"gpt-35-turbo-v0301",
"gpt",
},
},
file_location=KEY_LOC,
Expand Down
6 changes: 4 additions & 2 deletions test/oai/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def get_usage(response):
return {}


@pytest.mark.skipif(skip, reason="openai>=1 not installed")
# @pytest.mark.skipif(skip, reason="openai>=1 not installed")
@pytest.mark.skip(reason="This test is not working until Azure settings are updated")
def test_aoai_chat_completion():
config_list = config_list_from_json(
env_or_file=OAI_CONFIG_LIST,
Expand All @@ -88,7 +89,8 @@ def test_aoai_chat_completion():
print(client.extract_text_or_completion_object(response))


@pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed")
# @pytest.mark.skipif(skip or not TOOL_ENABLED, reason="openai>=1.1.0 not installed")
@pytest.mark.skip(reason="This test is not working until Azure settings are updated")
def test_oai_tool_calling_extraction():
config_list = config_list_from_json(
env_or_file=OAI_CONFIG_LIST,
Expand Down
3 changes: 2 additions & 1 deletion test/oai/test_client_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@


@pytest.mark.skipif(skip, reason="openai>=1 not installed")
@pytest.mark.skip(reason="This test is not working until Azure settings are updated.")
def test_aoai_chat_completion_stream() -> None:
config_list = config_list_from_json(
env_or_file=OAI_CONFIG_LIST,
Expand Down Expand Up @@ -236,7 +237,7 @@ def test_chat_tools_stream() -> None:
config_list = config_list_from_json(
env_or_file=OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"tags": ["multitool"]},
filter_dict={"tags": ["tool"]},
)
tools = [
{
Expand Down

0 comments on commit db28718

Please sign in to comment.