From d6761d1316a2dd549632d8f979b69e2aa2339161 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Mon, 23 Oct 2023 11:03:48 -0700
Subject: [PATCH 01/35] api_base -> base_url (#383)

---
 OAI_CONFIG_LIST_sample                        |  4 +--
 autogen/oai/completion.py                     |  6 ++---
 autogen/oai/openai_utils.py                   | 26 +++++++++----------
 notebook/agentchat_MathChat.ipynb             |  4 +--
 notebook/agentchat_RetrieveChat.ipynb         |  4 +--
 ...at_auto_feedback_from_code_execution.ipynb |  4 +--
 notebook/agentchat_chess.ipynb                |  6 ++---
 notebook/agentchat_groupchat.ipynb            |  4 +--
 notebook/agentchat_groupchat_RAG.ipynb        |  3 +--
 notebook/agentchat_groupchat_research.ipynb   |  4 +--
 notebook/agentchat_groupchat_vis.ipynb        |  4 +--
 notebook/agentchat_human_feedback.ipynb       |  8 +++---
 notebook/agentchat_planning.ipynb             |  4 +--
 notebook/agentchat_stream.ipynb               |  8 +++---
 notebook/agentchat_teachability.ipynb         |  4 +--
 notebook/agentchat_teaching.ipynb             |  4 +--
 notebook/agentchat_two_users.ipynb            |  4 +--
 notebook/agentchat_web_info.ipynb             |  4 +--
 notebook/oai_chatgpt_gpt4.ipynb               |  4 +--
 notebook/oai_completion.ipynb                 |  8 +++---
 notebook/oai_openai_utils.ipynb               |  8 +++---
 setup.py                                      |  2 +-
 test/oai/test_utils.py                        |  2 +-
 website/blog/2023-07-14-Local-LLMs/index.mdx  |  8 +++---
 website/docs/FAQ.md                           |  4 +--
 website/docs/Use-Cases/enhanced_inference.md  |  6 ++---
 26 files changed, 73 insertions(+), 74 deletions(-)
diff --git a/OAI_CONFIG_LIST_sample b/OAI_CONFIG_LIST_sample
index 01608aeeef8..c3071921118 100644
--- a/OAI_CONFIG_LIST_sample
+++ b/OAI_CONFIG_LIST_sample
@@ -7,14 +7,14 @@
     {
         "model": "gpt-4",
         "api_key": "<your Azure OpenAI API key here>",
-        "api_base": "<your Azure OpenAI API base here>",
+        "base_url": "<your Azure OpenAI API base here>",
         "api_type": "azure",
         "api_version": "2023-07-01-preview"
     },
     {
         "model": "gpt-3.5-turbo",
         "api_key": "<your Azure OpenAI API key here>",
-        "api_base": "<your Azure OpenAI API base here>",
+        "base_url": "<your Azure OpenAI API base here>",
         "api_type": "azure",
         "api_version": "2023-07-01-preview"
     }
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index a720ccc2446..a23352b982f 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -735,18 +735,18 @@ def create(
                     "model": "gpt-4",
                     "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
                     "api_type": "azure",
-                    "api_base": os.environ.get("AZURE_OPENAI_API_BASE"),
+                    "base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
                     "api_version": "2023-03-15-preview",
                 },
                 {
                     "model": "gpt-3.5-turbo",
                     "api_key": os.environ.get("OPENAI_API_KEY"),
                     "api_type": "open_ai",
-                    "api_base": "https://api.openai.com/v1",
+                    "base_url": "https://api.openai.com/v1",
                 },
                 {
                     "model": "llama-7B",
-                    "api_base": "http://127.0.0.1:8080",
+                    "base_url": "http://127.0.0.1:8080",
                     "api_type": "open_ai",
                 }
             ],
diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
index cbae458c59c..61dd330169c 100644
--- a/autogen/oai/openai_utils.py
+++ b/autogen/oai/openai_utils.py
@@ -7,7 +7,7 @@
 from dotenv import find_dotenv, load_dotenv
 
 
-NON_CACHE_KEY = ["api_key", "api_base", "api_type", "api_version"]
+NON_CACHE_KEY = ["api_key", "base_url", "api_type", "api_version"]
 
 
 def get_key(config):
@@ -33,13 +33,13 @@ def get_key(config):
 
 
 def get_config_list(
-    api_keys: List, api_bases: Optional[List] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
+    api_keys: List, base_urls: Optional[List] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
 ) -> List[Dict]:
     """Get a list of configs for openai api calls.
 
     Args:
         api_keys (list): The api keys for openai api calls.
-        api_bases (list, optional): The api bases for openai api calls.
+        base_urls (list, optional): The api bases for openai api calls.
         api_type (str, optional): The api type for openai api calls.
         api_version (str, optional): The api version for openai api calls.
     """
@@ -48,8 +48,8 @@ def get_config_list(
         if not api_key.strip():
             continue
         config = {"api_key": api_key}
-        if api_bases:
-            config["api_base"] = api_bases[i]
+        if base_urls:
+            config["base_url"] = base_urls[i]
         if api_type:
             config["api_type"] = api_type
         if api_version:
@@ -109,7 +109,7 @@ def config_list_openai_aoai(
             # Assuming Azure OpenAI api keys in os.environ["AZURE_OPENAI_API_KEY"], in separated lines
             api_keys=os.environ.get("AZURE_OPENAI_API_KEY", "").split("\n"),
             # Assuming Azure OpenAI api bases in os.environ["AZURE_OPENAI_API_BASE"], in separated lines
-            api_bases=os.environ.get("AZURE_OPENAI_API_BASE", "").split("\n"),
+            base_urls=os.environ.get("AZURE_OPENAI_API_BASE", "").split("\n"),
             api_type="azure",
             api_version="2023-07-01-preview",  # change if necessary
         )
@@ -121,7 +121,7 @@ def config_list_openai_aoai(
             # Assuming OpenAI API_KEY in os.environ["OPENAI_API_KEY"]
             api_keys=os.environ.get("OPENAI_API_KEY", "").split("\n"),
             # "api_type": "open_ai",
-            # "api_base": "https://api.openai.com/v1",
+            # "base_url": "https://api.openai.com/v1",
         )
         if exclude != "openai"
         else []
@@ -248,7 +248,7 @@ def config_list_from_json(
 
 
 def get_config(
-    api_key: str, api_base: Optional[str] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
+    api_key: str, base_url: Optional[str] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
 ) -> Dict:
     """
     Construct a configuration dictionary with the provided API configurations.
@@ -261,12 +261,12 @@ def get_config(
             "api_key_env_var": "ANOTHER_API_KEY",
             "api_type": "aoai",
             "api_version": "v2",
-            "api_base": "https://api.someotherapi.com"
+            "base_url": "https://api.someotherapi.com"
         }
     }
     Args:
         api_key (str): The API key used for authenticating API requests.
-        api_base (str, optional): The base URL of the API. Defaults to None.
+        base_url (str, optional): The base URL of the API. Defaults to None.
         api_type (str, optional): The type or kind of API. Defaults to None.
         api_version (str, optional): The API version. Defaults to None.
 
@@ -274,8 +274,8 @@ def get_config(
         Dict: A dictionary containing the API configurations.
     """
     config = {"api_key": api_key}
-    if api_base:
-        config["api_base"] = api_base
+    if base_url:
+        config["base_url"] = base_url
     if api_type:
         config["api_type"] = api_type
     if api_version:
@@ -302,7 +302,7 @@ def config_list_from_dotenv(
                                            If a string is provided as configuration, it is considered as an environment
                                            variable name storing the API key.
                                            If a dict is provided, it should contain at least 'api_key_env_var' key,
-                                           and optionally other API configurations like 'api_base', 'api_type', and 'api_version'.
+                                           and optionally other API configurations like 'base_url', 'api_type', and 'api_version'.
                                            Defaults to a basic map with 'gpt-4' and 'gpt-3.5-turbo' mapped to 'OPENAI_API_KEY'.
         filter_dict (dict, optional): A dictionary containing the models to be loaded.
                                       Containing a 'model' key mapped to a set of model names to be loaded.
diff --git a/notebook/agentchat_MathChat.ipynb b/notebook/agentchat_MathChat.ipynb
index 9dd90fb0635..9245252ae0c 100644
--- a/notebook/agentchat_MathChat.ipynb
+++ b/notebook/agentchat_MathChat.ipynb
@@ -91,14 +91,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index bfdb43b9d50..a0a8ff5d8e4 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -117,14 +117,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_auto_feedback_from_code_execution.ipynb b/notebook/agentchat_auto_feedback_from_code_execution.ipynb
index 564c7e2c8db..f3c027855bb 100644
--- a/notebook/agentchat_auto_feedback_from_code_execution.ipynb
+++ b/notebook/agentchat_auto_feedback_from_code_execution.ipynb
@@ -91,14 +91,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_chess.ipynb b/notebook/agentchat_chess.ipynb
index a06265d21bf..68ab8063fe5 100644
--- a/notebook/agentchat_chess.ipynb
+++ b/notebook/agentchat_chess.ipynb
@@ -105,14 +105,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
@@ -1010,7 +1010,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.17"
+   "version": "3.11.4"
   },
   "orig_nbformat": 4
  },
diff --git a/notebook/agentchat_groupchat.ipynb b/notebook/agentchat_groupchat.ipynb
index ac3a74fca59..22e9067f384 100644
--- a/notebook/agentchat_groupchat.ipynb
+++ b/notebook/agentchat_groupchat.ipynb
@@ -93,14 +93,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_groupchat_RAG.ipynb b/notebook/agentchat_groupchat_RAG.ipynb
index fd12cbe8c9b..654469dcc3a 100644
--- a/notebook/agentchat_groupchat_RAG.ipynb
+++ b/notebook/agentchat_groupchat_RAG.ipynb
@@ -88,9 +88,8 @@
     "        \"api_key\": \"<your OpenAI API key>\",\n",
     "    },  # OpenAI API endpoint for gpt-4\n",
     "    {\n",
-    "        \"engine\": \"gpt-35-turbo-0631\", \n",
     "        \"model\": \"gpt-35-turbo-0631\",  # 0631 or newer is needed to use functions\n",
-    "        \"api_base\": \"<your Azure OpenAI API base>\", \n",
+    "        \"base_url\": \"<your Azure OpenAI API base>\", \n",
     "        \"api_type\": \"azure\", \n",
     "        \"api_version\": \"2023-07-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n",
     "        \"api_key\": \"<your Azure OpenAI API key>\"\n",
diff --git a/notebook/agentchat_groupchat_research.ipynb b/notebook/agentchat_groupchat_research.ipynb
index 9d9c2c3dc3f..d7b562e3d74 100644
--- a/notebook/agentchat_groupchat_research.ipynb
+++ b/notebook/agentchat_groupchat_research.ipynb
@@ -79,14 +79,14 @@
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k-0314',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_groupchat_vis.ipynb b/notebook/agentchat_groupchat_vis.ipynb
index c780a588a6b..b4f492aa174 100644
--- a/notebook/agentchat_groupchat_vis.ipynb
+++ b/notebook/agentchat_groupchat_vis.ipynb
@@ -91,14 +91,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_human_feedback.ipynb b/notebook/agentchat_human_feedback.ipynb
index 0119fb48f47..ae5c0ea07b2 100644
--- a/notebook/agentchat_human_feedback.ipynb
+++ b/notebook/agentchat_human_feedback.ipynb
@@ -86,14 +86,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4\n",
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-4\n",
@@ -104,14 +104,14 @@
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-3.5-turbo\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-3.5-turbo\n",
diff --git a/notebook/agentchat_planning.ipynb b/notebook/agentchat_planning.ipynb
index b0fa24425b3..042fb7c6dfd 100644
--- a/notebook/agentchat_planning.ipynb
+++ b/notebook/agentchat_planning.ipynb
@@ -95,14 +95,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-07-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-07-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4-32k\n",
diff --git a/notebook/agentchat_stream.ipynb b/notebook/agentchat_stream.ipynb
index cfce4de54f9..3851df9d7d5 100644
--- a/notebook/agentchat_stream.ipynb
+++ b/notebook/agentchat_stream.ipynb
@@ -86,14 +86,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4\n",
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-4\n",
@@ -104,14 +104,14 @@
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-3.5-turbo\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-3.5-turbo\n",
diff --git a/notebook/agentchat_teachability.ipynb b/notebook/agentchat_teachability.ipynb
index 54f73fbcb83..10f836e0871 100644
--- a/notebook/agentchat_teachability.ipynb
+++ b/notebook/agentchat_teachability.ipynb
@@ -95,14 +95,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_teaching.ipynb b/notebook/agentchat_teaching.ipynb
index 0910aefa3e1..f7301676e44 100644
--- a/notebook/agentchat_teaching.ipynb
+++ b/notebook/agentchat_teaching.ipynb
@@ -58,14 +58,14 @@
     "    {\n",
     "        \"model\": \"gpt-4\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-06-01-preview\"\n",
     "    },\n",
     "    {\n",
     "        \"model\": \"gpt-4-32k\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-06-01-preview\"\n",
     "    }\n",
diff --git a/notebook/agentchat_two_users.ipynb b/notebook/agentchat_two_users.ipynb
index 22cb38db8ef..ff0d08d16cc 100644
--- a/notebook/agentchat_two_users.ipynb
+++ b/notebook/agentchat_two_users.ipynb
@@ -68,14 +68,14 @@
     "    {\n",
     "        \"model\": \"gpt-4\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-07-01-preview\"\n",
     "    },\n",
     "    {\n",
     "        \"model\": \"gpt-4-32k\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-07-01-preview\"\n",
     "    }\n",
diff --git a/notebook/agentchat_web_info.ipynb b/notebook/agentchat_web_info.ipynb
index 6f202b19d2e..9229365a40d 100644
--- a/notebook/agentchat_web_info.ipynb
+++ b/notebook/agentchat_web_info.ipynb
@@ -102,14 +102,14 @@
     "    {\n",
     "        'model': 'gpt4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k-0314',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/oai_chatgpt_gpt4.ipynb b/notebook/oai_chatgpt_gpt4.ipynb
index 9f273db8b5c..ba787af4cdb 100644
--- a/notebook/oai_chatgpt_gpt4.ipynb
+++ b/notebook/oai_chatgpt_gpt4.ipynb
@@ -124,13 +124,13 @@
     "    {'api_key': '<your OpenAI API key here>'},  # only if OpenAI API key is found\n",
     "    {\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # only if the at least one Azure OpenAI API key is found\n",
     "    {\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # only if the second Azure OpenAI API key is found\n",
diff --git a/notebook/oai_completion.ipynb b/notebook/oai_completion.ipynb
index 1d93140a561..f25db693fb8 100644
--- a/notebook/oai_completion.ipynb
+++ b/notebook/oai_completion.ipynb
@@ -91,13 +91,13 @@
     "#     },  # OpenAI API endpoint for gpt-4\n",
     "#     {\n",
     "#         'api_key': '<your first Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your first Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your first Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-03-15-preview',\n",
     "#     },  # Azure OpenAI API endpoint for gpt-4\n",
     "#     {\n",
     "#         'api_key': '<your second Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your second Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your second Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-03-15-preview',\n",
     "#     },  # another Azure OpenAI API endpoint for gpt-4\n",
@@ -125,14 +125,14 @@
     "#     {\n",
     "#         'model': 'gpt-3.5-turbo',\n",
     "#         'api_key': '<your first Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your first Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your first Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-06-01-preview',\n",
     "#     },  # Azure OpenAI API endpoint for gpt-3.5-turbo\n",
     "#     {\n",
     "#         'model': 'gpt-35-turbo-v0301',\n",
     "#         'api_key': '<your second Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your second Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your second Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-06-01-preview',\n",
     "#     },  # another Azure OpenAI API endpoint for gpt-3.5-turbo with deployment name gpt-35-turbo-v0301\n",
diff --git a/notebook/oai_openai_utils.ipynb b/notebook/oai_openai_utils.ipynb
index 6179c21dfa6..94cdcbb736f 100644
--- a/notebook/oai_openai_utils.ipynb
+++ b/notebook/oai_openai_utils.ipynb
@@ -88,13 +88,13 @@
    "outputs": [],
    "source": [
     "api_keys = [\"YOUR_OPENAI_API_KEY\"]\n",
-    "api_bases = None  # You can specify API base URLs if needed. eg: localhost:8000\n",
+    "base_urls = None  # You can specify API base URLs if needed. eg: localhost:8000\n",
     "api_type = \"openai\"  # Type of API, e.g., \"openai\" or \"aoai\".\n",
     "api_version = None  # Specify API version if needed.\n",
     "\n",
     "config_list = autogen.get_config_list(\n",
     "    api_keys,\n",
-    "    api_bases=api_bases,\n",
+    "    base_urls=base_urls,\n",
     "    api_type=api_type,\n",
     "    api_version=api_version\n",
     ")\n",
@@ -454,7 +454,7 @@
       "text/plain": [
        "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n",
        " {'api_key': '1234567890234567890',\n",
-       "  'api_base': 'https://api.someotherapi.com',\n",
+       "  'base_url': 'https://api.someotherapi.com',\n",
        "  'api_type': 'aoai',\n",
        "  'api_version': 'v2',\n",
        "  'model': 'gpt-3.5-turbo'}]"
@@ -474,7 +474,7 @@
     "            \"api_key_env_var\": \"ANOTHER_API_KEY\",\n",
     "            \"api_type\": \"aoai\",\n",
     "            \"api_version\": \"v2\",\n",
-    "            \"api_base\": \"https://api.someotherapi.com\"\n",
+    "            \"base_url\": \"https://api.someotherapi.com\"\n",
     "        }\n",
     "    },\n",
     "    filter_dict={\n",
diff --git a/setup.py b/setup.py
index b5f846984ae..d47e2dca362 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
 __version__ = version["__version__"]
 
 install_requires = [
-    "openai<1",
+    "openai>=1",
     "diskcache",
     "termcolor",
     "flaml",
diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py
index 8813ff0d7ae..04a8596fb54 100644
--- a/test/oai/test_utils.py
+++ b/test/oai/test_utils.py
@@ -24,7 +24,7 @@
         "api_key_env_var": "ANOTHER_API_KEY",
         "api_type": "aoai",
         "api_version": "v2",
-        "api_base": "https://api.someotherapi.com",
+        "base_url": "https://api.someotherapi.com",
     },
 }
 
diff --git a/website/blog/2023-07-14-Local-LLMs/index.mdx b/website/blog/2023-07-14-Local-LLMs/index.mdx
index 136e05e453f..e7ff8066d5c 100644
--- a/website/blog/2023-07-14-Local-LLMs/index.mdx
+++ b/website/blog/2023-07-14-Local-LLMs/index.mdx
@@ -77,7 +77,7 @@ response = oai.Completion.create(
     config_list=[
         {
             "model": "chatglm2-6b",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL", # just a placeholder
         }
@@ -91,7 +91,7 @@ response = oai.ChatCompletion.create(
     config_list=[
         {
             "model": "chatglm2-6b",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL",
         }
@@ -125,13 +125,13 @@ response = oai.ChatCompletion.create(
     config_list=[
         {
             "model": "chatglm2-6b",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL",
         },
         {
             "model": "vicuna-7b-v1.3",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL",
         }
diff --git a/website/docs/FAQ.md b/website/docs/FAQ.md
index ccf214c1336..cf0ce6ace16 100644
--- a/website/docs/FAQ.md
+++ b/website/docs/FAQ.md
@@ -36,14 +36,14 @@ The `OAI_CONFIG_LIST` var or file content looks like the following:
     {
         "model": "gpt-4",
         "api_key": "<your Azure OpenAI API key here>",
-        "api_base": "<your Azure OpenAI API base here>",
+        "base_url": "<your Azure OpenAI API base here>",
         "api_type": "azure",
         "api_version": "2023-07-01-preview"
     },
     {
         "model": "gpt-3.5-turbo",
         "api_key": "<your Azure OpenAI API key here>",
-        "api_base": "<your Azure OpenAI API base here>",
+        "base_url": "<your Azure OpenAI API base here>",
         "api_type": "azure",
         "api_version": "2023-07-01-preview"
     }
diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index 8b5dd5c694e..a7b1a17a725 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -138,19 +138,19 @@ response = autogen.Completion.create(
             "model": "gpt-4",
             "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
             "api_type": "azure",
-            "api_base": os.environ.get("AZURE_OPENAI_API_BASE"),
+            "base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
             "api_version": "2023-07-01-preview",
         },
         {
             "model": "gpt-3.5-turbo",
             "api_key": os.environ.get("OPENAI_API_KEY"),
             "api_type": "open_ai",
-            "api_base": "https://api.openai.com/v1",
+            "base_url": "https://api.openai.com/v1",
             "api_version": None,
         },
         {
             "model": "llama-7B",
-            "api_base": "http://127.0.0.1:8080",
+            "base_url": "http://127.0.0.1:8080",
             "api_type": "open_ai",
             "api_version": None,
         }

From dfd569566c0c76800e5e28f6e3f74a9b2c00f498 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Mon, 23 Oct 2023 11:16:15 -0700
Subject: [PATCH 02/35] InvalidRequestError -> BadRequestError (#389)

---
 autogen/oai/completion.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index a23352b982f..18cfcd97bbe 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -17,7 +17,7 @@
         ServiceUnavailableError,
         RateLimitError,
         APIError,
-        InvalidRequestError,
+        BadRequestError,
         APIConnectionError,
         Timeout,
         AuthenticationError,
@@ -261,7 +261,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
                         f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds."
                     )
                     return response
-            except InvalidRequestError:
+            except BadRequestError:
                 if "azure" in config.get("api_type", openai.api_type) and "model" in config:
                     # azure api uses "engine" instead of "model"
                     config["engine"] = config.pop("model").replace("gpt-3.5-turbo", "gpt-35-turbo")
@@ -817,7 +817,7 @@ def yes_or_no_filter(context, config, response):
                         response["pass_filter"] = pass_filter
                         return response
                     cost += response["cost"]
-                except (AuthenticationError, RateLimitError, Timeout, InvalidRequestError):
+                except (AuthenticationError, RateLimitError, Timeout, BadRequestError):
                     logger.debug(f"failed with config {i}", exc_info=1)
                     if i == last:
                         raise

From c41be9c4f45d3cc2488e98423cbe1477447770f6 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Mon, 23 Oct 2023 18:24:34 +0000
Subject: [PATCH 03/35] remove api_key_path; close #388

---
 autogen/oai/completion.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index 18cfcd97bbe..ce9d50f8690 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -198,7 +198,6 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
         Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
         """
         config = config.copy()
-        openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
         key = get_key(config)
         if use_cache:
             response = cls._cache.get(key, None)

From 2f97b8bb5ef3d76221922e47b688a2c468554140 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Tue, 24 Oct 2023 10:17:25 -0700
Subject: [PATCH 04/35] close #402 (#403)

---
 autogen/oai/completion.py   | 8 ++------
 test/oai/test_completion.py | 2 +-
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index ce9d50f8690..29ed334bd82 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -13,8 +13,7 @@
 
 try:
     import openai
-    from openai.error import (
-        ServiceUnavailableError,
+    from openai import (
         RateLimitError,
         APIError,
         BadRequestError,
@@ -221,10 +220,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
                     response = openai_completion.create(**config)
                 else:
                     response = openai_completion.create(request_timeout=request_timeout, **config)
-            except (
-                ServiceUnavailableError,
-                APIConnectionError,
-            ):
+            except APIConnectionError:
                 # transient error
                 logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
                 sleep(retry_wait_time)
diff --git a/test/oai/test_completion.py b/test/oai/test_completion.py
index f33533b7bf8..dd03864de8e 100644
--- a/test/oai/test_completion.py
+++ b/test/oai/test_completion.py
@@ -272,7 +272,7 @@ def test_humaneval(num_samples=1):
     )
     response = autogen.ChatCompletion.create(context=test_data[0], config_list=config_list, **config)
     print(response)
-    from openai.error import RateLimitError
+    from openai import RateLimitError
 
     try:
         code, cost, selected = implement(tune_data[1], [{**config_list[-1], **config}])

From 1df493bff888ba87ad59e598008bf40f8f10b417 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 25 Oct 2023 09:41:28 -0700
Subject: [PATCH 05/35] openai client (#419)

* openai client

* client test
---
 autogen/oai/__init__.py |   2 +
 autogen/oai/_client.py  | 234 ++++++++++++++++++++++++++++++++++++++++
 setup.py                |   2 +-
 test/test_client.py     |  33 ++++++
 4 files changed, 270 insertions(+), 1 deletion(-)
 create mode 100644 autogen/oai/_client.py
 create mode 100644 test/test_client.py

diff --git a/autogen/oai/__init__.py b/autogen/oai/__init__.py
index a1b34b33683..e98ef45ead1 100644
--- a/autogen/oai/__init__.py
+++ b/autogen/oai/__init__.py
@@ -1,3 +1,4 @@
+from autogen.oai._client import OpenAIWrapper
 from autogen.oai.completion import Completion, ChatCompletion
 from autogen.oai.openai_utils import (
     get_config_list,
@@ -9,6 +10,7 @@
 )
 
 __all__ = [
+    "OpenAIWrapper",
     "Completion",
     "ChatCompletion",
     "get_config_list",
diff --git a/autogen/oai/_client.py b/autogen/oai/_client.py
new file mode 100644
index 00000000000..55e31ea15b1
--- /dev/null
+++ b/autogen/oai/_client.py
@@ -0,0 +1,234 @@
+import sys
+from typing import List, Optional, Dict, Callable
+import logging
+import inspect
+from flaml.automl.logger import logger_formatter
+from openai.types.chat import ChatCompletion
+from openai.types.completion import Completion
+
+from autogen.oai.openai_utils import get_key
+
+try:
+    from openai import (
+        RateLimitError,
+        APIError,
+        BadRequestError,
+        APIConnectionError,
+        Timeout,
+        AuthenticationError,
+    )
+    from openai import OpenAI
+    import diskcache
+
+    ERROR = None
+except ImportError:
+    ERROR = ImportError("please install openai>=1 and diskcache to use the autogen.oai subpackage.")
+    OpenAI = object
+logger = logging.getLogger(__name__)
+if not logger.handlers:
+    # Add the console handler.
+    _ch = logging.StreamHandler(stream=sys.stdout)
+    _ch.setFormatter(logger_formatter)
+    logger.addHandler(_ch)
+
+
+class OpenAIWrapper:
+    """A wrapper class for openai client."""
+
+    cache_path_root: str = ".cache"
+    extra_kwargs = {"seed", "filter_func", "allow_format_str_template", "context", "api_type", "api_version"}
+    openai_kwargs = set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs)
+
+    def __init__(self, *, config_list: List[Dict] = None, **base_config):
+        """
+        Args:
+            config_list: a list of config dicts to override the base_config.
+                They can contain additional kwargs as allowed in the [create](/docs/reference/oai/_client/#create) method. E.g.,
+
+        ```python
+        config_list=[
+            {
+                "model": "gpt-4",
+                "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
+                "api_type": "azure",
+                "base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
+                "api_version": "2023-03-15-preview",
+            },
+            {
+                "model": "gpt-3.5-turbo",
+                "api_key": os.environ.get("OPENAI_API_KEY"),
+                "api_type": "open_ai",
+                "base_url": "https://api.openai.com/v1",
+            },
+            {
+                "model": "llama-7B",
+                "base_url": "http://127.0.0.1:8080",
+                "api_type": "open_ai",
+            }
+        ]
+        ```
+
+            base_config: base config. It can contain both keyword arguments for openai client
+                and additional kwargs.
+        """
+        openai_config, extra_kwargs = self._separate_openai_config(base_config)
+        if type(config_list) is list and len(config_list) == 0:
+            logger.warning("openai client was provided with an empty config_list, which may not be intended.")
+        if config_list:
+            self._clients = [self._client(config, openai_config) for config in config_list]
+            self._config_list = [
+                {**extra_kwargs, **{k: v for k, v in config.items() if k not in self.openai_kwargs}}
+                for config in config_list
+            ]
+        else:
+            self._clients = [OpenAI(**openai_config)]
+            self._config_list = [extra_kwargs]
+
+    def _separate_openai_config(self, config):
+        """Separate the config into openai_config and extra_kwargs."""
+        openai_config = {k: v for k, v in config.items() if k in self.openai_kwargs}
+        extra_kwargs = {k: v for k, v in config.items() if k not in self.openai_kwargs}
+        return openai_config, extra_kwargs
+
+    def _separate_create_config(self, config):
+        """Separate the config into create_config and extra_kwargs."""
+        create_config = {k: v for k, v in config.items() if k not in self.extra_kwargs}
+        extra_kwargs = {k: v for k, v in config.items() if k in self.extra_kwargs}
+        return create_config, extra_kwargs
+
+    def _client(self, config, openai_config):
+        """Create a client with the given config to overrdie openai_config,
+        after removing extra kwargs.
+        """
+        config = {**openai_config, **{k: v for k, v in config.items() if k in self.openai_kwargs}}
+        client = OpenAI(**config)
+        return client
+
+    @classmethod
+    def instantiate(
+        cls,
+        template: str | Callable | None,
+        context: Optional[Dict] = None,
+        allow_format_str_template: Optional[bool] = False,
+    ):
+        if not context or template is None:
+            return template
+        if isinstance(template, str):
+            return template.format(**context) if allow_format_str_template else template
+        return template(context)
+
+    def _construct_create_params(self, create_config: Dict, extra_kwargs: Dict) -> Dict:
+        """Prime the create_config with additional_kwargs."""
+        # Validate the config
+        prompt = create_config.get("prompt")
+        messages = create_config.get("messages")
+        if (prompt is None) == (messages is None):
+            raise ValueError("Either prompt or messages should be in create config but not both.")
+        context = extra_kwargs.get("context")
+        if context is None:
+            # No need to instantiate if no context is provided.
+            return create_config
+        # Instantiate the prompt or messages
+        allow_format_str_template = extra_kwargs.get("allow_format_str_template", False)
+        # Make a copy of the config
+        params = create_config.copy()
+        if prompt is not None:
+            # Instantiate the prompt
+            params["prompt"] = self.instantiate(prompt, context, allow_format_str_template)
+        elif context:
+            # Instantiate the messages
+            params["messages"] = [
+                {
+                    **m,
+                    "content": self.instantiate(m["content"], context, allow_format_str_template),
+                }
+                if m.get("content")
+                else m
+                for m in messages
+            ]
+        return params
+
+    def create(self, **config):
+        """Make a completion for a given config using openai's clients.
+        Besides the kwargs allowed in openai's client, we allow the following additional kwargs.
+        The config in each client will be overriden by the config.
+
+        Args:
+            - context (Dict | None): The context to instantiate the prompt or messages. Default to None.
+                It needs to contain keys that are used by the prompt template or the filter function.
+                E.g., `prompt="Complete the following sentence: {prefix}, context={"prefix": "Today I feel"}`.
+                The actual prompt will be:
+                "Complete the following sentence: Today I feel".
+                More examples can be found at [templating](/docs/Use-Cases/enhanced_inference#templating).
+            - `seed` (int | None) for the cache. Default to 41.
+                An integer seed is useful when implementing "controlled randomness" for the completion.
+                None for no caching.
+            - filter_func (Callable | None): A function that takes in the context and the response
+                and returns a boolean to indicate whether the response is valid. E.g.,
+
+        ```python
+        def yes_or_no_filter(context, response):
+            return context.get("yes_or_no_choice", False) is False or any(
+                text in ["Yes.", "No."] for text in client.extract_text_or_function_call(response)
+            )
+        ```
+
+            - allow_format_str_template (bool | None): Whether to allow format string template in the config. Default to false.
+        """
+        if ERROR:
+            raise ERROR
+        last = len(self._clients) - 1
+        for i, client in enumerate(self._clients):
+            # merge the input config with the i-th config in the config list
+            full_config = {**config, **self._config_list[i]}
+            # separate the config into create_config and extra_kwargs
+            create_config, extra_kwargs = self._separate_create_config(full_config)
+            # construct the create params
+            params = self._construct_create_params(create_config, extra_kwargs)
+            # get the seed, filter_func and context
+            seed = extra_kwargs.get("seed", 41)
+            filter_func = extra_kwargs.get("filter_func")
+            context = extra_kwargs.get("context")
+            with diskcache.Cache(f"{self.cache_path_root}/{seed}") as cache:
+                if seed is not None:
+                    # Try to get the response from cache
+                    key = get_key(params)
+                    response = cache.get(key, None)
+                    if response is not None:
+                        # check the filter
+                        pass_filter = filter_func is None or filter_func(context=context, response=response)
+                        if pass_filter or i == last:
+                            # Return the response if it passes the filter or it is the last client
+                            response.config_id = i
+                            response.pass_filter = pass_filter
+                            # TODO: add response.cost
+                            return response
+                completions = client.chat.completions if "messages" in params else client.completions
+                try:
+                    response = completions.create(**params)
+                except APIConnectionError:
+                    # This seems to be the only error raised by openai
+                    logger.debug(f"config {i} failed", exc_info=1)
+                    if i == last:
+                        raise
+                else:
+                    if seed is not None:
+                        # Cache the response
+                        cache.set(key, response)
+                    return response
+
+    def extract_text_or_function_call(cls, response: ChatCompletion | Completion) -> List[str]:
+        """Extract the text or function calls from a completion or chat response.
+
+        Args:
+            response (ChatCompletion | Completion): The response from openai.
+
+        Returns:
+            A list of text or function calls in the responses.
+        """
+        choices = response.choices
+        if isinstance(response, Completion):
+            return [choice.text for choice in choices]
+        return [
+            choice.message if choice.message.function_call is not None else choice.message.content for choice in choices
+        ]
diff --git a/setup.py b/setup.py
index d47e2dca362..688a783f6e0 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
 __version__ = version["__version__"]
 
 install_requires = [
-    "openai>=1",
+    "openai==1.0.0b3",
     "diskcache",
     "termcolor",
     "flaml",
diff --git a/test/test_client.py b/test/test_client.py
new file mode 100644
index 00000000000..9a39df2801e
--- /dev/null
+++ b/test/test_client.py
@@ -0,0 +1,33 @@
+import pytest
+from autogen import OpenAIWrapper, config_list_from_json, config_list_openai_aoai
+
+try:
+    from openai import OpenAI
+except ImportError:
+    skip = True
+
+
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_chat_completion():
+    config_list = config_list_from_json(
+        env_or_file="OAI_CONFIG_LIST",
+        file_location="notebook",
+    )
+    client = OpenAIWrapper(config_list=config_list)
+    response = client.create(messages=[{"role": "user", "content": "1+1="}])
+    print(response)
+    print(client.extract_text_or_function_call(response))
+
+
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_completion():
+    config_list = config_list_openai_aoai("notebook")
+    client = OpenAIWrapper(config_list=config_list)
+    response = client.create(prompt="1+1=", model="gpt-3.5-turbo-instruct")
+    print(response)
+    print(client.extract_text_or_function_call(response))
+
+
+if __name__ == "__main__":
+    test_chat_completion()
+    test_completion()

From d77b1c992dbfb10c43b3e29c3f0644e8413706c3 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 25 Oct 2023 17:28:08 +0000
Subject: [PATCH 06/35] _client -> client

---
 autogen/oai/__init__.py               | 2 +-
 autogen/oai/{_client.py => client.py} | 2 +-
 test/test_client.py                   | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)
 rename autogen/oai/{_client.py => client.py} (99%)

diff --git a/autogen/oai/__init__.py b/autogen/oai/__init__.py
index e98ef45ead1..dbcd2f79607 100644
--- a/autogen/oai/__init__.py
+++ b/autogen/oai/__init__.py
@@ -1,4 +1,4 @@
-from autogen.oai._client import OpenAIWrapper
+from autogen.oai.client import OpenAIWrapper
 from autogen.oai.completion import Completion, ChatCompletion
 from autogen.oai.openai_utils import (
     get_config_list,
diff --git a/autogen/oai/_client.py b/autogen/oai/client.py
similarity index 99%
rename from autogen/oai/_client.py
rename to autogen/oai/client.py
index 55e31ea15b1..2b043f0504c 100644
--- a/autogen/oai/_client.py
+++ b/autogen/oai/client.py
@@ -43,7 +43,7 @@ def __init__(self, *, config_list: List[Dict] = None, **base_config):
         """
         Args:
             config_list: a list of config dicts to override the base_config.
-                They can contain additional kwargs as allowed in the [create](/docs/reference/oai/_client/#create) method. E.g.,
+                They can contain additional kwargs as allowed in the [create](/docs/reference/oai/_client#create) method. E.g.,
 
         ```python
         config_list=[
diff --git a/test/test_client.py b/test/test_client.py
index 9a39df2801e..bcec8cb8f08 100644
--- a/test/test_client.py
+++ b/test/test_client.py
@@ -5,6 +5,8 @@
     from openai import OpenAI
 except ImportError:
     skip = True
+else:
+    skip = False
 
 
 @pytest.mark.skipif(skip, reason="openai>=1 not installed")

From 6a8eaf3229281e0cf1d7e3f6742ddf61fcde2abb Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 25 Oct 2023 17:42:19 +0000
Subject: [PATCH 07/35] _client -> client

---
 autogen/oai/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 2b043f0504c..8b7c2ba4259 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -43,7 +43,7 @@ def __init__(self, *, config_list: List[Dict] = None, **base_config):
         """
         Args:
             config_list: a list of config dicts to override the base_config.
-                They can contain additional kwargs as allowed in the [create](/docs/reference/oai/_client#create) method. E.g.,
+                They can contain additional kwargs as allowed in the [create](/docs/reference/oai/client#create) method. E.g.,
 
         ```python
         config_list=[

From c3f58f3deaf34c7c4dfdf81e4a9f1159366818e5 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 25 Oct 2023 18:12:25 +0000
Subject: [PATCH 08/35] extra kwargs

---
 autogen/oai/client.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 8b7c2ba4259..1f189cbe5b0 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -174,6 +174,8 @@ def yes_or_no_filter(context, response):
         ```
 
             - allow_format_str_template (bool | None): Whether to allow format string template in the config. Default to false.
+            - api_type (str | None): The api type. Default to None. E.g., "azure" or "azure_ad".
+            - api_version (str | None): The api version. Default to None. E.g., "2023-08-01-preview".
         """
         if ERROR:
             raise ERROR

From 75a6f7d82de2b08ebd3a02c2c30c612092e959c6 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 25 Oct 2023 19:44:15 -0700
Subject: [PATCH 09/35] Completion -> client (#426)

* Completion -> client

* Completion -> client

* Completion -> client

* Completion -> client

* support aoai

* fix test error

* remove commented code

* support aoai
---
 .github/workflows/openai.yml                  |  4 +-
 README.md                                     |  2 +-
 autogen/agentchat/assistant_agent.py          |  2 +-
 autogen/agentchat/contrib/teachable_agent.py  | 11 ++--
 .../agentchat/contrib/text_analyzer_agent.py  |  8 +--
 autogen/agentchat/conversable_agent.py        | 38 +++++++-----
 autogen/agentchat/user_proxy_agent.py         |  2 +-
 autogen/code_utils.py                         | 55 +++--------------
 autogen/math_utils.py                         |  2 +-
 autogen/oai/client.py                         | 61 ++++++++++++++-----
 autogen/oai/completion.py                     |  4 +-
 notebook/agentchat_MathChat.ipynb             |  4 +-
 notebook/agentchat_RetrieveChat.ipynb         |  4 +-
 notebook/agentchat_function_call.ipynb        |  2 +-
 notebook/agentchat_groupchat_RAG.ipynb        |  6 +-
 notebook/agentchat_groupchat_research.ipynb   |  2 +-
 notebook/agentchat_human_feedback.ipynb       | 23 ++-----
 notebook/agentchat_langchain.ipynb            |  4 +-
 notebook/agentchat_lmm_llava.ipynb            | 10 ++-
 notebook/agentchat_planning.ipynb             |  2 +-
 notebook/agentchat_qdrant_RetrieveChat.ipynb  |  4 +-
 notebook/agentchat_stream.ipynb               |  2 +-
 notebook/agentchat_teachability.ipynb         |  4 +-
 notebook/agentchat_teaching.ipynb             |  2 +-
 notebook/agentchat_two_users.ipynb            |  2 +-
 notebook/agentchat_web_info.ipynb             |  2 +-
 notebook/oai_chatgpt_gpt4.ipynb               |  2 +-
 notebook/oai_openai_utils.ipynb               |  4 +-
 test/agentchat/chat_with_teachable_agent.py   |  2 +-
 test/agentchat/test_assistant_agent.py        | 33 +++++-----
 test/agentchat/test_async.py                  |  2 +-
 test/agentchat/test_conversable_agent.py      | 16 ++---
 test/{ => agentchat}/test_function_call.py    | 12 ++--
 test/agentchat/test_math_user_proxy_agent.py  | 18 +++---
 .../test_py10.py}                             | 10 +--
 test/agentchat/test_qdrant_retrievechat.py    |  4 +-
 test/agentchat/test_retrievechat.py           |  4 +-
 test/agentchat/test_teachable_agent.py        | 20 +++---
 ...test_completion.py => _test_completion.py} |  3 +-
 test/oai/test_utils.py                        |  6 +-
 test/test_client.py                           | 21 ++++++-
 test/test_code.py                             |  2 +-
 test/test_notebook.py                         |  4 +-
 website/blog/2023-07-14-Local-LLMs/index.mdx  |  2 +-
 .../blog/2023-10-18-RetrieveChat/index.mdx    |  2 +-
 website/docs/FAQ.md                           |  9 ++-
 website/docs/Getting-Started.md               |  2 +-
 website/docs/Use-Cases/enhanced_inference.md  | 58 ++++++++++--------
 48 files changed, 251 insertions(+), 247 deletions(-)
 rename test/{ => agentchat}/test_function_call.py (95%)
 rename test/{test_with_openai.py => agentchat/test_py10.py} (92%)
 rename test/oai/{test_completion.py => _test_completion.py} (99%)

diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 0a678fd9ed1..13e6c4124f2 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -54,6 +54,7 @@ jobs:
         run: |
           pip install -e .[retrievechat]
       - name: Install packages for Teachable when needed
+        if: matrix.python-version == '3.11'
         run: |
           pip install -e .[teachable]
       - name: Coverage
@@ -76,7 +77,8 @@ jobs:
           OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
         run: |
           pip install nbconvert nbformat ipykernel
-          coverage run -a -m pytest test/test_with_openai.py
+          coverage run -a -m pytest test/agentchat/test_py10.py
+          coverage run -a -m pytest test/agentchat/test_teachable_agent.py
           coverage run -a -m pytest test/test_notebook.py
           coverage xml
           cat "$(pwd)/test/executed_openai_notebook_output.txt"
diff --git a/README.md b/README.md
index 430a431d96b..d95c7430f9b 100644
--- a/README.md
+++ b/README.md
@@ -113,7 +113,7 @@ Please find more [code examples](https://microsoft.github.io/autogen/docs/Exampl
 Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers enhanced LLM inference with powerful functionalities like tuning, caching, error handling, and templating. For example, you can optimize generations by LLM with your own tuning data, success metrics, and budgets.
 
 ```python
-# perform tuning
+# perform tuning for openai<1
 config, analysis = autogen.Completion.tune(
     data=tune_data,
     metric="success",
diff --git a/autogen/agentchat/assistant_agent.py b/autogen/agentchat/assistant_agent.py
index cac322774ba..4a0200fb672 100644
--- a/autogen/agentchat/assistant_agent.py
+++ b/autogen/agentchat/assistant_agent.py
@@ -43,7 +43,7 @@ def __init__(
             system_message (str): system message for the ChatCompletion inference.
                 Please override this attribute if you want to reprogram the agent.
             llm_config (dict): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
             is_termination_msg (function): a function that takes a message in the form of a dictionary
                 and returns a boolean value indicating if this received message is a termination message.
diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py
index 8db5b699ea2..a3334d6836a 100644
--- a/autogen/agentchat/contrib/teachable_agent.py
+++ b/autogen/agentchat/contrib/teachable_agent.py
@@ -40,7 +40,7 @@ def __init__(
             system_message (str): system message for the ChatCompletion inference.
             human_input_mode (str): This agent should NEVER prompt the human for input.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 To disable llm-based auto reply, set to False.
             analyzer_llm_config (dict or False): llm inference configuration passed to TextAnalyzerAgent.
@@ -125,11 +125,8 @@ def _generate_teachable_assistant_reply(
                 messages = messages.copy()
                 messages[-1]["content"] = new_user_text
 
-        # Generate a response.
-        msgs = self._oai_system_message + messages
-        response = oai.ChatCompletion.create(messages=msgs, **self.llm_config)
-        response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0]
-        return True, response_text
+        # Generate a response by reusing existing generate_oai_reply
+        return self.generate_oai_reply(messages, sender, config)
 
     def learn_from_user_feedback(self):
         """Reviews the user comments from the last chat, and decides what teachings to store as memos."""
@@ -265,6 +262,8 @@ def analyze(self, text_to_analyze, analysis_instructions):
             self.send(recipient=self.analyzer, message=analysis_instructions, request_reply=True)  # Request the reply.
             return self.last_message(self.analyzer)["content"]
         else:
+            # TODO: This is not an encouraged usage pattern. It breaks the conversation-centric design.
+            # consider using the arg "silent"
             # Use the analyzer's method directly, to leave analyzer message out of the printed chat.
             return self.analyzer.analyze_text(text_to_analyze, analysis_instructions)
 
diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py
index 8cf88eba6ae..2df839a1172 100644
--- a/autogen/agentchat/contrib/text_analyzer_agent.py
+++ b/autogen/agentchat/contrib/text_analyzer_agent.py
@@ -26,7 +26,7 @@ def __init__(
             system_message (str): system message for the ChatCompletion inference.
             human_input_mode (str): This agent should NEVER prompt the human for input.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 To disable llm-based auto reply, set to False.
             teach_config (dict or None): Additional parameters used by TeachableAgent.
@@ -74,9 +74,5 @@ def analyze_text(self, text_to_analyze, analysis_instructions):
         msg_text = "\n".join(
             [analysis_instructions, text_to_analyze, analysis_instructions]
         )  # Repeat the instructions.
-        messages = self._oai_system_message + [{"role": "user", "content": msg_text}]
-
         # Generate and return the analysis string.
-        response = oai.ChatCompletion.create(context=None, messages=messages, **self.llm_config)
-        output_text = oai.ChatCompletion.extract_text_or_function_call(response)[0]
-        return output_text
+        return self.generate_oai_reply([{"role": "user", "content": msg_text}], None, None)[1]
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 3a0e1959881..d777b86d30b 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -4,7 +4,7 @@
 import json
 import logging
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
-from autogen import oai
+from autogen import OpenAIWrapper
 from .agent import Agent
 from autogen.code_utils import (
     DEFAULT_MODEL,
@@ -93,7 +93,7 @@ def __init__(
                 - timeout (Optional, int): The maximum execution time in seconds.
                 - last_n_messages (Experimental, Optional, int): The number of messages to look back for code execution. Default to 1.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 To disable llm-based auto reply, set to False.
             default_auto_reply (str or dict or None): default auto reply when no code execution or llm-based reply is generated.
@@ -107,10 +107,12 @@ def __init__(
         )
         if llm_config is False:
             self.llm_config = False
+            self.client = None
         else:
             self.llm_config = self.DEFAULT_CONFIG.copy()
             if isinstance(llm_config, dict):
                 self.llm_config.update(llm_config)
+            self.client = OpenAIWrapper(**self.llm_config)
 
         self._code_execution_config = {} if code_execution_config is None else code_execution_config
         self.human_input_mode = human_input_mode
@@ -254,8 +256,10 @@ def _message_to_dict(message: Union[Dict, str]):
         """
         if isinstance(message, str):
             return {"content": message}
-        else:
+        elif isinstance(message, dict):
             return message
+        else:
+            return dict(message)
 
     def _append_oai_message(self, message: Union[Dict, str], role, conversation_id: Agent) -> bool:
         """Append a message to the ChatCompletion conversation.
@@ -285,6 +289,7 @@ def _append_oai_message(self, message: Union[Dict, str], role, conversation_id:
         oai_message["role"] = "function" if message.get("role") == "function" else role
         if "function_call" in oai_message:
             oai_message["role"] = "assistant"  # only messages with role 'assistant' can have a function call.
+            oai_message["function_call"] = dict(oai_message["function_call"])
         self._oai_messages[conversation_id].append(oai_message)
         return True
 
@@ -306,7 +311,7 @@ def send(
                 - role (str): the role of the message, any role that is not "function"
                     will be modified to "assistant".
                 - context (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
                     For example, one agent can send a message A as:
         ```python
         {
@@ -355,7 +360,7 @@ async def a_send(
                 - role (str): the role of the message, any role that is not "function"
                     will be modified to "assistant".
                 - context (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
                     For example, one agent can send a message A as:
         ```python
         {
@@ -398,18 +403,21 @@ def _print_received_message(self, message: Union[Dict, str], sender: Agent):
             content = message.get("content")
             if content is not None:
                 if "context" in message:
-                    content = oai.ChatCompletion.instantiate(
+                    content = OpenAIWrapper.instantiate(
                         content,
                         message["context"],
                         self.llm_config and self.llm_config.get("allow_format_str_template", False),
                     )
                 print(content, flush=True)
             if "function_call" in message:
-                func_print = f"***** Suggested function Call: {message['function_call'].get('name', '(No function name found)')} *****"
+                function_call = dict(message["function_call"])
+                func_print = (
+                    f"***** Suggested function Call: {function_call.get('name', '(No function name found)')} *****"
+                )
                 print(colored(func_print, "green"), flush=True)
                 print(
                     "Arguments: \n",
-                    message["function_call"].get("arguments", "(No arguments found)"),
+                    function_call.get("arguments", "(No arguments found)"),
                     flush=True,
                     sep="",
                 )
@@ -447,7 +455,7 @@ def receive(
                     This field is only needed to distinguish between "function" or "assistant"/"user".
                 4. "name": In most cases, this field is not needed. When the role is "function", this field is needed to indicate the function name.
                 5. "context" (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
             sender: sender of an Agent instance.
             request_reply (bool or None): whether a reply is requested from the sender.
                 If None, the value is determined by `self.reply_at_receive[sender]`.
@@ -483,7 +491,7 @@ async def a_receive(
                     This field is only needed to distinguish between "function" or "assistant"/"user".
                 4. "name": In most cases, this field is not needed. When the role is "function", this field is needed to indicate the function name.
                 5. "context" (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
             sender: sender of an Agent instance.
             request_reply (bool or None): whether a reply is requested from the sender.
                 If None, the value is determined by `self.reply_at_receive[sender]`.
@@ -596,17 +604,17 @@ def generate_oai_reply(
         config: Optional[Any] = None,
     ) -> Tuple[bool, Union[str, Dict, None]]:
         """Generate a reply using autogen.oai."""
-        llm_config = self.llm_config if config is None else config
-        if llm_config is False:
+        client = self.client if config is None else config
+        if client is None:
             return False, None
         if messages is None:
             messages = self._oai_messages[sender]
 
         # TODO: #1143 handle token limit exceeded error
-        response = oai.ChatCompletion.create(
-            context=messages[-1].pop("context", None), messages=self._oai_system_message + messages, **llm_config
+        response = client.create(
+            context=messages[-1].pop("context", None), messages=self._oai_system_message + messages
         )
-        return True, oai.ChatCompletion.extract_text_or_function_call(response)[0]
+        return True, client.extract_text_or_function_call(response)[0]
 
     def generate_code_execution_reply(
         self,
diff --git a/autogen/agentchat/user_proxy_agent.py b/autogen/agentchat/user_proxy_agent.py
index ae5f908d8fe..d72c2bdceba 100644
--- a/autogen/agentchat/user_proxy_agent.py
+++ b/autogen/agentchat/user_proxy_agent.py
@@ -63,7 +63,7 @@ def __init__(
                 - last_n_messages (Experimental, Optional, int): The number of messages to look back for code execution. Default to 1.
             default_auto_reply (str or dict or None): the default auto reply message when no code execution or llm based reply is generated.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 Default to false, which disables llm-based auto reply.
             system_message (str): system message for ChatCompletion inference.
diff --git a/autogen/code_utils.py b/autogen/code_utils.py
index caaf0907285..8c62dc17c6b 100644
--- a/autogen/code_utils.py
+++ b/autogen/code_utils.py
@@ -84,49 +84,8 @@ def extract_code(
     return extracted
 
 
-# _FIND_CODE_SYS_MSG = [
-#     {
-#         "role": "system",
-#         "content": """In the following conversation, an assistant suggests code and a user is expected to run it.
-# Read the conversation, and then find all the right code blocks for the user to run next in the right order.
-# Only return the code blocks that are expected to run.
-# Don't include code blocks which have been executed unless the user is requested to run the same block again.
-# When the user needs to run multiple blocks in sequence, make sure to output all the blocks to run in a right order.
-# If the line beginning with "# filename" is put before a code block, move it into the code block as the first line.
-# Make sure to add the right "python" or "sh" identifier if the language identifier is missing for a code block.
-# Don't make other changes to the code blocks.
-# Don't reply anything else if at least one code block is expected to run.
-# If no code block is expeted to run, check whether the task has been successfully finished at full satisfaction.
-# If not, reply with the reason why the task is not finished.""",
-#     },
-# ]
-# _FIND_CODE_CONFIG = {
-#     "model": FAST_MODEL,
-# }
-
-
-# def find_code(messages: List[Dict], sys_msg=None, **config) -> Tuple[List[Tuple[str, str]], str]:
-#     """Find code from a list of messages.
-
-#     Args:
-#         messages (str): The list of messages to find code from.
-#         sys_msg (Optional, str): The system message to prepend to the messages.
-#         config (Optional, dict): The configuration for the API call.
-
-#     Returns:
-#         list: A list of tuples, each containing the language and the code.
-#         str: The generated text by llm.
-#     """
-#     params = {**_FIND_CODE_CONFIG, **config}
-#     if sys_msg is None or not sys_msg[0]["content"]:
-#         sys_msg = _FIND_CODE_SYS_MSG
-#     response = oai.ChatCompletion.create(messages=sys_msg + messages, **params)
-#     content = oai.Completion.extract_text(response)[0]
-#     return extract_code(content), content
-
-
 def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, float]:
-    """Generate code.
+    """(Deprecated) Generate code.
 
     Args:
         pattern (Optional, str): The regular expression pattern for finding the code block.
@@ -151,7 +110,7 @@ def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, flo
 
 
 def improve_function(file_name, func_name, objective, **config):
-    """(work in progress) Improve the function to achieve the objective."""
+    """(Deprecated) Improve the function to achieve the objective."""
     params = {**_IMPROVE_FUNCTION_CONFIG, **config}
     # read the entire file into a str
     with open(file_name, "r") as f:
@@ -172,7 +131,7 @@ def improve_function(file_name, func_name, objective, **config):
 
 
 def improve_code(files, objective, suggest_only=True, **config):
-    """Improve the code to achieve a given objective.
+    """(Deprecated) Improve the code to achieve a given objective.
 
     Args:
         files (list): A list of file names containing the source code.
@@ -422,7 +381,7 @@ def execute_code(
 
 
 def generate_assertions(definition: str, **config) -> Tuple[str, float]:
-    """Generate assertions for a function.
+    """(Deprecated) Generate assertions for a function.
 
     Args:
         definition (str): The function definition, including the signature and docstr.
@@ -459,7 +418,7 @@ def eval_function_completions(
     timeout: Optional[float] = 3,
     use_docker: Optional[bool] = True,
 ) -> Dict:
-    """Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
+    """(Deprecated) Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
 
     Args:
         responses (list): The list of responses.
@@ -549,7 +508,7 @@ def __init__(self, assertions):
         self.metrics = self.responses = None
 
     def pass_assertions(self, context, response, **_):
-        """Check if the response passes the assertions."""
+        """(Deprecated) Check if the response passes the assertions."""
         responses = oai.Completion.extract_text(response)
         metrics = eval_function_completions(responses, context["definition"], assertions=self._assertions)
         self._assertions = metrics["assertions"]
@@ -564,7 +523,7 @@ def implement(
     configs: Optional[List[Dict]] = None,
     assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = generate_assertions,
 ) -> Tuple[str, float]:
-    """Implement a function from a definition.
+    """(Deprecated) Implement a function from a definition.
 
     Args:
         definition (str): The function definition, including the signature and docstr.
diff --git a/autogen/math_utils.py b/autogen/math_utils.py
index 7f35470fa6b..eb512473389 100644
--- a/autogen/math_utils.py
+++ b/autogen/math_utils.py
@@ -9,7 +9,7 @@
 
 
 def solve_problem(problem: str, **config) -> str:
-    """(Experimental) Solve the math problem.
+    """(Deprecated) Solve the math problem.
 
     Args:
         problem (str): The problem statement.
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 1f189cbe5b0..8dda5ac3272 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -1,3 +1,4 @@
+import os
 import sys
 from typing import List, Optional, Dict, Callable
 import logging
@@ -9,15 +10,7 @@
 from autogen.oai.openai_utils import get_key
 
 try:
-    from openai import (
-        RateLimitError,
-        APIError,
-        BadRequestError,
-        APIConnectionError,
-        Timeout,
-        AuthenticationError,
-    )
-    from openai import OpenAI
+    from openai import OpenAI, APIError
     import diskcache
 
     ERROR = None
@@ -36,7 +29,7 @@ class OpenAIWrapper:
     """A wrapper class for openai client."""
 
     cache_path_root: str = ".cache"
-    extra_kwargs = {"seed", "filter_func", "allow_format_str_template", "context", "api_type", "api_version"}
+    extra_kwargs = {"seed", "filter_func", "allow_format_str_template", "context", "api_version"}
     openai_kwargs = set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs)
 
     def __init__(self, *, config_list: List[Dict] = None, **base_config):
@@ -81,13 +74,44 @@ def __init__(self, *, config_list: List[Dict] = None, **base_config):
                 for config in config_list
             ]
         else:
-            self._clients = [OpenAI(**openai_config)]
+            self._clients = [self._client(extra_kwargs, openai_config)]
             self._config_list = [extra_kwargs]
 
+    def _process_for_azure(self, config: Dict, extra_kwargs: Dict, segment: str = "default"):
+        # deal with api_version
+        query_segment = f"{segment}_query"
+        headers_segment = f"{segment}_headers"
+        api_version = extra_kwargs.get("api_version")
+        if api_version is not None and query_segment not in config:
+            config[query_segment] = {"api-version": api_version}
+            if segment == "default":
+                # remove the api_version from extra_kwargs
+                extra_kwargs.pop("api_version")
+        if segment == "extra":
+            return config
+        # deal with api_type
+        api_type = extra_kwargs.get("api_type")
+        if api_type is not None and api_type.startswith("azure") and headers_segment not in config:
+            api_key = config.get("api_key", os.environ.get("AZURE_OPENAI_API_KEY"))
+            config[headers_segment] = {"api-key": api_key}
+            # remove the api_type from extra_kwargs
+            extra_kwargs.pop("api_type")
+            # deal with model
+            model = extra_kwargs.get("model")
+            if model is None:
+                return
+            base_url = config.get("base_url")
+            if base_url is None:
+                raise ValueError("to use azure openai api, base_url must be specified.")
+            suffix = f"openai/deployments/{model}"
+            if not base_url.endswith(suffix):
+                config["base_url"] += suffix
+
     def _separate_openai_config(self, config):
         """Separate the config into openai_config and extra_kwargs."""
         openai_config = {k: v for k, v in config.items() if k in self.openai_kwargs}
         extra_kwargs = {k: v for k, v in config.items() if k not in self.openai_kwargs}
+        self._process_for_azure(openai_config, extra_kwargs)
         return openai_config, extra_kwargs
 
     def _separate_create_config(self, config):
@@ -100,8 +124,9 @@ def _client(self, config, openai_config):
         """Create a client with the given config to overrdie openai_config,
         after removing extra kwargs.
         """
-        config = {**openai_config, **{k: v for k, v in config.items() if k in self.openai_kwargs}}
-        client = OpenAI(**config)
+        openai_config = {**openai_config, **{k: v for k, v in config.items() if k in self.openai_kwargs}}
+        self._process_for_azure(openai_config, config)
+        client = OpenAI(**openai_config)
         return client
 
     @classmethod
@@ -174,7 +199,6 @@ def yes_or_no_filter(context, response):
         ```
 
             - allow_format_str_template (bool | None): Whether to allow format string template in the config. Default to false.
-            - api_type (str | None): The api type. Default to None. E.g., "azure" or "azure_ad".
             - api_version (str | None): The api version. Default to None. E.g., "2023-08-01-preview".
         """
         if ERROR:
@@ -185,6 +209,8 @@ def yes_or_no_filter(context, response):
             full_config = {**config, **self._config_list[i]}
             # separate the config into create_config and extra_kwargs
             create_config, extra_kwargs = self._separate_create_config(full_config)
+            # process for azure
+            self._process_for_azure(create_config, extra_kwargs, "extra")
             # construct the create params
             params = self._construct_create_params(create_config, extra_kwargs)
             # get the seed, filter_func and context
@@ -208,8 +234,7 @@ def yes_or_no_filter(context, response):
                 completions = client.chat.completions if "messages" in params else client.completions
                 try:
                     response = completions.create(**params)
-                except APIConnectionError:
-                    # This seems to be the only error raised by openai
+                except APIError:
                     logger.debug(f"config {i} failed", exc_info=1)
                     if i == last:
                         raise
@@ -219,6 +244,7 @@ def yes_or_no_filter(context, response):
                         cache.set(key, response)
                     return response
 
+    @classmethod
     def extract_text_or_function_call(cls, response: ChatCompletion | Completion) -> List[str]:
         """Extract the text or function calls from a completion or chat response.
 
@@ -234,3 +260,6 @@ def extract_text_or_function_call(cls, response: ChatCompletion | Completion) ->
         return [
             choice.message if choice.message.function_call is not None else choice.message.content for choice in choices
         ]
+
+
+# TODO: logging
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index 29ed334bd82..f4cf171e22d 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -37,7 +37,7 @@
 
 
 class Completion(openai_Completion):
-    """A class for OpenAI completion API.
+    """(Deprecated) A class for OpenAI completion API.
 
     It also supports: ChatCompletion, Azure OpenAI API.
     """
@@ -1172,7 +1172,7 @@ def stop_logging(cls):
 
 
 class ChatCompletion(Completion):
-    """A class for OpenAI API ChatCompletion. Share the same API as Completion."""
+    """(Deprecated) A class for OpenAI API ChatCompletion. Share the same API as Completion."""
 
     default_search_space = Completion.default_search_space.copy()
     default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
diff --git a/notebook/agentchat_MathChat.ipynb b/notebook/agentchat_MathChat.ipynb
index 9245252ae0c..4159784bccf 100644
--- a/notebook/agentchat_MathChat.ipynb
+++ b/notebook/agentchat_MathChat.ipynb
@@ -128,14 +128,12 @@
    "source": [
     "from autogen.agentchat.contrib.math_user_proxy_agent import MathUserProxyAgent\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "# 1. create an AssistantAgent instance named \"assistant\"\n",
     "assistant = autogen.AssistantAgent(\n",
     "    name=\"assistant\", \n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "    }\n",
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index ce3d25d7a30..e0aa629cb75 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -179,14 +179,12 @@
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "import chromadb\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
     "assistant = RetrieveAssistantAgent(\n",
     "    name=\"assistant\", \n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "    },\n",
diff --git a/notebook/agentchat_function_call.ipynb b/notebook/agentchat_function_call.ipynb
index 3d73066f69a..836a3b0af9c 100644
--- a/notebook/agentchat_function_call.ipynb
+++ b/notebook/agentchat_function_call.ipynb
@@ -253,7 +253,7 @@
     "        },\n",
     "    ],\n",
     "    \"config_list\": config_list,\n",
-    "    \"request_timeout\": 120,\n",
+    "    \"timeout\": 120,\n",
     "}\n",
     "chatbot = autogen.AssistantAgent(\n",
     "    name=\"chatbot\",\n",
diff --git a/notebook/agentchat_groupchat_RAG.ipynb b/notebook/agentchat_groupchat_RAG.ipynb
index 654469dcc3a..8d3848f01e9 100644
--- a/notebook/agentchat_groupchat_RAG.ipynb
+++ b/notebook/agentchat_groupchat_RAG.ipynb
@@ -121,13 +121,13 @@
     "import chromadb\n",
     "\n",
     "llm_config = {\n",
-    "    \"request_timeout\": 60,\n",
+    "    \"timeout\": 60,\n",
     "    \"seed\": 42,\n",
     "    \"config_list\": config_list,\n",
     "    \"temperature\": 0,\n",
     "}\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
+    "# autogen.ChatCompletion.start_logging()\n",
     "termination_msg = lambda x: isinstance(x, dict) and \"TERMINATE\" == str(x.get(\"content\", \"\"))[-9:].upper()\n",
     "\n",
     "boss = autogen.UserProxyAgent(\n",
@@ -250,7 +250,7 @@
     "            },\n",
     "        ],\n",
     "        \"config_list\": config_list,\n",
-    "        \"request_timeout\": 60,\n",
+    "        \"timeout\": 60,\n",
     "        \"seed\": 42,\n",
     "    }\n",
     "\n",
diff --git a/notebook/agentchat_groupchat_research.ipynb b/notebook/agentchat_groupchat_research.ipynb
index d7b562e3d74..79cda682a32 100644
--- a/notebook/agentchat_groupchat_research.ipynb
+++ b/notebook/agentchat_groupchat_research.ipynb
@@ -116,7 +116,7 @@
     "    \"seed\": 42,  # change the seed for different trials\n",
     "    \"temperature\": 0,\n",
     "    \"config_list\": config_list_gpt4,\n",
-    "    \"request_timeout\": 120,\n",
+    "    \"timeout\": 120,\n",
     "}\n",
     "user_proxy = autogen.UserProxyAgent(\n",
     "   name=\"Admin\",\n",
diff --git a/notebook/agentchat_human_feedback.ipynb b/notebook/agentchat_human_feedback.ipynb
index ae5c0ea07b2..0d7fa3c9778 100644
--- a/notebook/agentchat_human_feedback.ipynb
+++ b/notebook/agentchat_human_feedback.ipynb
@@ -152,10 +152,7 @@
     "    name=\"user_proxy\",\n",
     "    human_input_mode=\"ALWAYS\",\n",
     "    is_termination_msg=lambda x: x.get(\"content\", \"\").rstrip().endswith(\"TERMINATE\"),\n",
-    ")\n",
-    "\n",
-    "# the purpose of the following line is to log the conversation history\n",
-    "autogen.ChatCompletion.start_logging()\n"
+    ")\n"
    ]
   },
   {
@@ -372,24 +369,16 @@
     "\n",
     "The human user can provide feedback at each step. When the human user didn't provide feedback, the code was executed. The executed results and error messages are returned to the assistant, and the assistant is able to modify the code based on the feedback. In the end, the task is complete and a \"TERMINATE\" signal is sent from the assistant. The user skipped feedback in the end and the conversation is finished.\n",
     "\n",
-    "After the conversation is finished, we can save the conversation log between the two agents. The log can be accessed from `autogen.ChatCompletion.logged_history`."
+    "After the conversation is finished, we can save the conversations between the two agents. The conversation can be accessed from `user_proxy.chat_messages`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'[{\"content\": \"You are a helpful AI assistant.\\\\nSolve tasks using your coding and language skills.\\\\nIn the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.\\\\n    1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.\\\\n    2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.\\\\nSolve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.\\\\nWhen using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can\\'t modify your code. So do not suggest incomplete code which requires users to modify. Don\\'t use a code block if it\\'s not intended to be executed by the user.\\\\nIf you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don\\'t include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use \\'print\\' function for the output when relevant. Check the execution result returned by the user.\\\\nIf the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can\\'t be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\\\\nWhen you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.\\\\nReply \\\\\"TERMINATE\\\\\" in the end when everything is done.\\\\n    \", \"role\": \"system\"}, {\"content\": \"\\\\nFind $a + b + c$, given that $x+y \\\\\\\\neq -1$ and \\\\n\\\\\\\\begin{align}\\\\n\\\\tax + by + c & = x + 7,\\\\\\\\\\\\n\\\\ta + bx + cy & = 2x + 6y,\\\\\\\\\\\\n\\\\tay + b + cx & = 4x + y.\\\\n\\\\\\\\end{align}.\\\\n\", \"role\": \"user\"}, {\"content\": \"To solve the system of linear equations, I will use NumPy library in Python to calculate the coefficients and constants and then use the linalg.solve() function to find the values of a, b, and c. Finally, I will add them up to find $a + b + c$.\\\\n\\\\nStep 1: Define the coefficients and constants\\\\n\\\\nStep 2: Solve the system of equations using NumPy\\\\n\\\\nStep 3: Calculate the sum of a, b, and c\", \"role\": \"assistant\"}, {\"content\": \"use sympy\", \"role\": \"user\"}, {\"content\": \"Sure, I will use Sympy to solve the system of linear equations.\\\\n\\\\nStep 1: Import Sympy library and define the symbols\\\\n``` python\\\\nfrom sympy import symbols, Eq, solve\\\\n\\\\na, b, c, x, y = symbols(\\'a b c x y\\')\\\\n```\\\\n\\\\nStep 2: Define the equations\\\\n``` python\\\\neq1 = Eq(a * x + b * y + c, x + 7)\\\\neq2 = Eq(a + b * x + c * y, 2 * x + 6 * y)\\\\neq3 = Eq(a * y + b + c * x, 4 * x + y)\\\\n```\\\\n\\\\nStep 3: Solve the system of equations\\\\n``` python\\\\nsolutions = solve((eq1, eq2, eq3), (a, b, c))\\\\n```\\\\n\\\\nStep 4: Calculate the sum of a, b, and c\\\\n``` python\\\\na_value, b_value, c_value = solutions[a], solutions[b], solutions[c]\\\\nsum_abc = a_value + b_value + c_value\\\\n```\\\\n\\\\n\", \"role\": \"assistant\"}, {\"content\": \"exitcode: 1 (execution failed)\\\\nCode output: \\\\n  File \\\\\"\\\\\", line 2\\\\n    Step 2: Define the equations\\\\n         ^\\\\nSyntaxError: invalid syntax\\\\n\", \"role\": \"user\"}, {\"content\": \"Apologies for the confusion. I should have provided the code in a single block without the step-by-step comments. Here is the code to solve the system of linear equations using Sympy:\\\\n\\\\n```python\\\\nfrom sympy import symbols, Eq, solve\\\\n\\\\na, b, c, x, y = symbols(\\'a b c x y\\')\\\\n\\\\neq1 = Eq(a * x + b * y + c, x + 7)\\\\neq2 = Eq(a + b * x + c * y, 2 * x + 6 * y)\\\\neq3 = Eq(a * y + b + c * x, 4 * x + y)\\\\n\\\\nsolutions = solve((eq1, eq2, eq3), (a, b, c))\\\\n\\\\na_value, b_value, c_value = solutions[a], solutions[b], solutions[c]\\\\nsum_abc = a_value + b_value + c_value\\\\nprint(sum_abc)\\\\n```\\\\n\\\\nPlease run the provided code to find the sum of a, b, and c.\", \"role\": \"assistant\"}, {\"content\": \"exitcode: 0 (execution succeeded)\\\\nCode output: \\\\n(2*x**2 - 3*x + y)/(x**2 - x*y - x + y**2 - y + 1) + (x**2 - 3*x*y + 2*x + y**2 - y)/(x**2 - x*y - x + y**2 - y + 1) + (4*x**2 - 4*x*y - 6*x + 6*y**2 - 7*y + 7)/(x**2 - x*y - x + y**2 - y + 1)\\\\n\", \"role\": \"user\"}, {\"content\": \"I apologize for the confusion. The code provided the sum of a, b, and c in a symbolic representation. To get an answer, we need to put some constraints on x and y for which x + y is not equal to -1.\\\\n\\\\nWe can choose any values for x and y that don\\'t satisfy x + y = -1, and then substitute them into the equation to find the corresponding sum of a, b, and c. Let\\'s use x = 1 and y = 0, which satisfy the given condition.\\\\n\\\\nHere is the updated code with the chosen values for x and y:\\\\n\\\\n```python\\\\nfrom sympy import symbols, Eq, solve\\\\n\\\\na, b, c, x, y = symbols(\\'a b c x y\\')\\\\n\\\\neq1 = Eq(a * x + b * y + c, x + 7)\\\\neq2 = Eq(a + b * x + c * y, 2 * x + 6 * y)\\\\neq3 = Eq(a * y + b + c * x, 4 * x + y)\\\\n\\\\nsolutions = solve((eq1, eq2, eq3), (a, b, c))\\\\n\\\\nchosen_x = 1\\\\nchosen_y = 0\\\\n\\\\na_value, b_value, c_value = solutions[a].subs(x, chosen_x).subs(y, chosen_y), solutions[b].subs(x, chosen_x).subs(y, chosen_y), solutions[c].subs(x, chosen_x).subs(y, chosen_y)\\\\nsum_abc = a_value + b_value + c_value\\\\nprint(sum_abc)\\\\n```\\\\n\\\\nPlease run the provided code again to find the sum of a, b, and c.\", \"role\": \"assistant\"}, {\"content\": \"exitcode: 0 (execution succeeded)\\\\nCode output: \\\\n7\\\\n\", \"role\": \"user\"}, {\"content\": \"Great! Now we have the sum of a, b, and c. The answer is:\\\\n\\\\n$$a + b + c = 7$$\\\\n\\\\nTERMINATE\", \"role\": \"assistant\"}]': {'created_at': [0, 1, 2, 3, 4], 'cost': [0.022019999999999998, 0.03305999999999999, 0.04019999999999999, 0.058589999999999996, 0.050969999999999994]}}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "print(autogen.ChatCompletion.logged_history)"
+    "print(user_proxy.chat_messages[assistant])"
    ]
   },
   {
@@ -400,7 +389,7 @@
    "source": [
     "import json\n",
     "\n",
-    "json.dump(autogen.ChatCompletion.logged_history, open(\"conversations.json\", \"w\"), indent=2)"
+    "json.dump(user_proxy.chat_messages[assistant], open(\"conversations.json\", \"w\"), indent=2)"
    ]
   }
  ],
diff --git a/notebook/agentchat_langchain.ipynb b/notebook/agentchat_langchain.ipynb
index f1eac5f4935..314df30530d 100644
--- a/notebook/agentchat_langchain.ipynb
+++ b/notebook/agentchat_langchain.ipynb
@@ -328,7 +328,7 @@
         "      generate_llm_config(read_file_tool),\n",
         "  ],\n",
         "  \"config_list\": config_list,  # Assuming you have this defined elsewhere\n",
-        "  \"request_timeout\": 120,\n",
+        "  \"timeout\": 120,\n",
         "}\n",
         "\n",
         "user_proxy = autogen.UserProxyAgent(\n",
@@ -681,7 +681,7 @@
         "llm_config = {\n",
         "  \"functions\": tools,\n",
         "  \"config_list\": config_list,  # Assuming you have this defined elsewhere\n",
-        "  \"request_timeout\": 120,\n",
+        "  \"timeout\": 120,\n",
         "}\n",
         "\n",
         "user_proxy = autogen.UserProxyAgent(\n",
diff --git a/notebook/agentchat_lmm_llava.ipynb b/notebook/agentchat_lmm_llava.ipynb
index a3a51d3abfb..d42fb53a0f5 100644
--- a/notebook/agentchat_lmm_llava.ipynb
+++ b/notebook/agentchat_lmm_llava.ipynb
@@ -93,16 +93,14 @@
    "source": [
     "import requests\n",
     "import json\n",
-    "import os\n",
     "from llava.conversation import default_conversation as conv\n",
     "from llava.conversation import Conversation\n",
     "\n",
-    "from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union\n",
+    "from typing import Dict, List, Optional, Tuple, Union\n",
     "\n",
     "import autogen\n",
-    "from autogen import AssistantAgent, Agent, UserProxyAgent, ConversableAgent\n",
-    "from termcolor import colored\n",
-    "import random"
+    "from autogen import AssistantAgent, Agent, ConversableAgent, OpenAIWrapper\n",
+    "from termcolor import colored"
    ]
   },
   {
@@ -650,7 +648,7 @@
     "            content = message.get(\"content\")\n",
     "            if content is not None:\n",
     "                if \"context\" in message:\n",
-    "                    content = oai.ChatCompletion.instantiate(\n",
+    "                    content = OpenAIWrapper.instantiate(\n",
     "                        content,\n",
     "                        message[\"context\"],\n",
     "                        self.llm_config and self.llm_config.get(\"allow_format_str_template\", False),\n",
diff --git a/notebook/agentchat_planning.ipynb b/notebook/agentchat_planning.ipynb
index 042fb7c6dfd..0d5679ce6a8 100644
--- a/notebook/agentchat_planning.ipynb
+++ b/notebook/agentchat_planning.ipynb
@@ -161,7 +161,7 @@
     "    name=\"assistant\",\n",
     "    llm_config={\n",
     "        \"temperature\": 0,\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"model\": \"gpt-4-0613\",\n",
     "        \"config_list\": autogen.config_list_openai_aoai(exclude=\"aoai\"),\n",
diff --git a/notebook/agentchat_qdrant_RetrieveChat.ipynb b/notebook/agentchat_qdrant_RetrieveChat.ipynb
index 42a5cf82f4c..3a97007c5d9 100644
--- a/notebook/agentchat_qdrant_RetrieveChat.ipynb
+++ b/notebook/agentchat_qdrant_RetrieveChat.ipynb
@@ -159,14 +159,12 @@
     "from autogen.agentchat.contrib.qdrant_retrieve_user_proxy_agent import QdrantRetrieveUserProxyAgent\n",
     "from qdrant_client import QdrantClient\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
     "assistant = RetrieveAssistantAgent(\n",
     "    name=\"assistant\", \n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "    },\n",
diff --git a/notebook/agentchat_stream.ipynb b/notebook/agentchat_stream.ipynb
index 3851df9d7d5..35666f03ecd 100644
--- a/notebook/agentchat_stream.ipynb
+++ b/notebook/agentchat_stream.ipynb
@@ -228,7 +228,7 @@
     "assistant = autogen.AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 41,\n",
     "        \"config_list\": config_list,\n",
     "        \"temperature\": 0,\n",
diff --git a/notebook/agentchat_teachability.ipynb b/notebook/agentchat_teachability.ipynb
index 10f836e0871..66439fb91ea 100644
--- a/notebook/agentchat_teachability.ipynb
+++ b/notebook/agentchat_teachability.ipynb
@@ -142,7 +142,7 @@
     "from autogen import UserProxyAgent\n",
     "\n",
     "llm_config = {\n",
-    "    \"request_timeout\": 60,\n",
+    "    \"timeout\": 60,\n",
     "    \"config_list\": config_list,\n",
     "    \"use_cache\": True,  # Use False to explore LLM non-determinism.\n",
     "}\n",
@@ -160,8 +160,6 @@
     "    def colored(x, *args, **kwargs):\n",
     "        return x\n",
     "    \n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "teachable_agent = TeachableAgent(\n",
     "    name=\"teachableagent\",\n",
     "    llm_config=llm_config,\n",
diff --git a/notebook/agentchat_teaching.ipynb b/notebook/agentchat_teaching.ipynb
index f7301676e44..dcee7cd5b02 100644
--- a/notebook/agentchat_teaching.ipynb
+++ b/notebook/agentchat_teaching.ipynb
@@ -84,7 +84,7 @@
     "import autogen\n",
     "\n",
     "llm_config={\n",
-    "    \"request_timeout\": 600,\n",
+    "    \"timeout\": 600,\n",
     "    \"seed\": 44,  # change the seed for different trials\n",
     "    \"config_list\": autogen.config_list_from_json(\n",
     "        \"OAI_CONFIG_LIST\",\n",
diff --git a/notebook/agentchat_two_users.ipynb b/notebook/agentchat_two_users.ipynb
index ff0d08d16cc..355693ac047 100644
--- a/notebook/agentchat_two_users.ipynb
+++ b/notebook/agentchat_two_users.ipynb
@@ -160,7 +160,7 @@
     "    name=\"assistant_for_student\",\n",
     "    system_message=\"You are a helpful assistant. Reply TERMINATE when the task is done.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        # Excluding azure openai endpoints from the config list.\n",
     "        # Change to `exclude=\"openai\"` to exclude openai endpoints, or remove the `exclude` argument to include both.\n",
diff --git a/notebook/agentchat_web_info.ipynb b/notebook/agentchat_web_info.ipynb
index 9229365a40d..221462edc02 100644
--- a/notebook/agentchat_web_info.ipynb
+++ b/notebook/agentchat_web_info.ipynb
@@ -78,7 +78,7 @@
     ")\n",
     "\n",
     "llm_config={\n",
-    "    \"request_timeout\": 600,\n",
+    "    \"timeout\": 600,\n",
     "    \"seed\": 42,\n",
     "    \"config_list\": config_list,\n",
     "    \"temperature\": 0,\n",
diff --git a/notebook/oai_chatgpt_gpt4.ipynb b/notebook/oai_chatgpt_gpt4.ipynb
index ba787af4cdb..921f1279063 100644
--- a/notebook/oai_chatgpt_gpt4.ipynb
+++ b/notebook/oai_chatgpt_gpt4.ipynb
@@ -51,7 +51,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install \"pyautogen[blendsearch]\" datasets"
+    "# %pip install \"pyautogen[blendsearch]<0.2\" datasets"
    ]
   },
   {
diff --git a/notebook/oai_openai_utils.ipynb b/notebook/oai_openai_utils.ipynb
index 94cdcbb736f..1c87b164120 100644
--- a/notebook/oai_openai_utils.ipynb
+++ b/notebook/oai_openai_utils.ipynb
@@ -226,7 +226,7 @@
     "assistant_one = autogen.AssistantAgent(\n",
     "    name=\"3.5-assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": cheap_config_list,\n",
     "        \"temperature\": 0,\n",
@@ -237,7 +237,7 @@
     "assistant_two = autogen.AssistantAgent(\n",
     "    name=\"4-assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": costly_config_list,\n",
     "        \"temperature\": 0,\n",
diff --git a/test/agentchat/chat_with_teachable_agent.py b/test/agentchat/chat_with_teachable_agent.py
index 211ebe59097..f11635a8c68 100644
--- a/test/agentchat/chat_with_teachable_agent.py
+++ b/test/agentchat/chat_with_teachable_agent.py
@@ -26,7 +26,7 @@ def create_teachable_agent(reset_db=False):
     config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict)
     teachable_agent = TeachableAgent(
         name="teachableagent",
-        llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache},
+        llm_config={"config_list": config_list, "timeout": 120, "use_cache": use_cache},
         teach_config={
             "verbosity": verbosity,
             "reset_db": reset_db,
diff --git a/test/agentchat/test_assistant_agent.py b/test/agentchat/test_assistant_agent.py
index 1239bde453b..61510a44f9c 100644
--- a/test/agentchat/test_assistant_agent.py
+++ b/test/agentchat/test_assistant_agent.py
@@ -20,7 +20,7 @@ def test_ai_user_proxy_agent():
         return
 
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)
 
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
@@ -30,7 +30,7 @@ def test_ai_user_proxy_agent():
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
@@ -97,7 +97,10 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
             "timeout": 60,
         },
         llm_config=llm_config,
-        system_message="""Reply TERMINATE to end the conversation.""",
+        system_message="""Is code provided but not enclosed in ``` blocks?
+If so, remind that code blocks need to be enclosed in ``` blocks.
+Reply TERMINATE to end the conversation if the task is finished. Don't say appreciation.
+If "Thank you" or "You\'re welcome" are said in the conversation, then say TERMINATE and that is your last message.""",
     )
     user.initiate_chat(assistant, message="TERMINATE")
     # should terminate without sending any message
@@ -119,9 +122,9 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
 
     config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, file_location=KEY_LOC)
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)
     llm_config = {
-        "request_timeout": 600,
+        "timeout": 600,
         "seed": 42,
         "config_list": config_list,
     }
@@ -148,12 +151,12 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
 ```""",
     )
     print(conversations)
-    autogen.ChatCompletion.print_usage_summary()
-    autogen.ChatCompletion.start_logging(compact=False)
+    # autogen.ChatCompletion.print_usage_summary()
+    # autogen.ChatCompletion.start_logging(compact=False)
     user.send("""Execute temp.py""", assistant)
-    print(autogen.ChatCompletion.logged_history)
-    autogen.ChatCompletion.print_usage_summary()
-    autogen.ChatCompletion.stop_logging()
+    # print(autogen.ChatCompletion.logged_history)
+    # autogen.ChatCompletion.print_usage_summary()
+    # autogen.ChatCompletion.stop_logging()
 
 
 def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
@@ -184,7 +187,7 @@ def __init__(self, *args, **kwargs):
         def generate_init_message(self, question) -> str:
             return self._prompt.format(question=question)
 
-    autogen.ChatCompletion.start_logging()
+    # autogen.ChatCompletion.start_logging()
     assistant = AssistantAgent("assistant", llm_config={"temperature": 0, "config_list": config_list})
     user = TSPUserProxyAgent(
         "user",
@@ -193,14 +196,14 @@ def generate_init_message(self, question) -> str:
         max_consecutive_auto_reply=max_consecutive_auto_reply,
     )
     user.initiate_chat(assistant, question=hard_questions[2])
-    print(autogen.ChatCompletion.logged_history)
-    autogen.ChatCompletion.stop_logging()
+    # print(autogen.ChatCompletion.logged_history)
+    # autogen.ChatCompletion.stop_logging()
 
 
 if __name__ == "__main__":
-    test_gpt35()
+    # test_gpt35()
     # test_create_execute_script(human_input_mode="TERMINATE")
     # when GPT-4, i.e., the DEFAULT_MODEL, is used, conversation in the following test
     # should terminate in 2-3 rounds of interactions (because is_termination_msg should be true after 2-3 rounds)
     # although the max_consecutive_auto_reply is set to 10.
-    # test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10)
+    test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10)
diff --git a/test/agentchat/test_async.py b/test/agentchat/test_async.py
index 9a806e6af40..f8f89b7495b 100644
--- a/test/agentchat/test_async.py
+++ b/test/agentchat/test_async.py
@@ -68,7 +68,7 @@ async def add_stock_price_data():
     assistant = autogen.AssistantAgent(
         name="assistant",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 41,
             "config_list": config_list,
             "temperature": 0,
diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py
index 905e34bddd9..2d26027a34a 100644
--- a/test/agentchat/test_conversable_agent.py
+++ b/test/agentchat/test_conversable_agent.py
@@ -15,7 +15,7 @@ def conversable_agent():
 
 def test_trigger():
     agent = ConversableAgent("a0", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
-    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, human_input_mode="NEVER")
+    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
     agent.register_reply(agent1, lambda recipient, messages, sender, config: (True, "hello"))
     agent1.initiate_chat(agent, message="hi")
     assert agent1.last_message(agent)["content"] == "hello"
@@ -53,7 +53,7 @@ def test_trigger():
 
 def test_context():
     agent = ConversableAgent("a0", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
-    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, human_input_mode="NEVER")
+    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
     agent1.send(
         {
             "content": "hello {name}",
@@ -131,7 +131,7 @@ def test_generate_code_execution_reply():
 
 def test_max_consecutive_auto_reply():
     agent = ConversableAgent("a0", max_consecutive_auto_reply=2, llm_config=False, human_input_mode="NEVER")
-    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, human_input_mode="NEVER")
+    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
     assert agent.max_consecutive_auto_reply() == agent.max_consecutive_auto_reply(agent1) == 2
     agent.update_max_consecutive_auto_reply(1)
     assert agent.max_consecutive_auto_reply() == agent.max_consecutive_auto_reply(agent1) == 1
@@ -159,8 +159,8 @@ def test_max_consecutive_auto_reply():
 
 
 def test_conversable_agent():
-    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", human_input_mode="ALWAYS")
-    dummy_agent_2 = ConversableAgent(name="dummy_agent_2", human_input_mode="TERMINATE")
+    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", llm_config=False, human_input_mode="ALWAYS")
+    dummy_agent_2 = ConversableAgent(name="dummy_agent_2", llm_config=False, human_input_mode="TERMINATE")
 
     # monkeypatch.setattr(sys, "stdin", StringIO("exit"))
     dummy_agent_1.receive("hello", dummy_agent_2)  # receive a str
@@ -212,7 +212,9 @@ def add_num(num_to_be_added):
         given_num = 10
         return num_to_be_added + given_num
 
-    dummy_agent_2 = ConversableAgent(name="user_proxy", human_input_mode="TERMINATE", function_map={"add_num": add_num})
+    dummy_agent_2 = ConversableAgent(
+        name="user_proxy", llm_config=False, human_input_mode="TERMINATE", function_map={"add_num": add_num}
+    )
     messsages = [{"function_call": {"name": "add_num", "arguments": '{ "num_to_be_added": 5 }'}, "role": "assistant"}]
 
     # when sender is None, messages is provided
@@ -221,7 +223,7 @@ def add_num(num_to_be_added):
     ), "generate_reply not working when sender is None"
 
     # when sender is provided, messages is None
-    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", human_input_mode="ALWAYS")
+    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", llm_config=False, human_input_mode="ALWAYS")
     dummy_agent_2._oai_messages[dummy_agent_1] = messsages
     assert (
         dummy_agent_2.generate_reply(messages=None, sender=dummy_agent_1)["content"] == "15"
diff --git a/test/test_function_call.py b/test/agentchat/test_function_call.py
similarity index 95%
rename from test/test_function_call.py
rename to test/agentchat/test_function_call.py
index 9b026ca3c1d..c373e6ee860 100644
--- a/test/test_function_call.py
+++ b/test/agentchat/test_function_call.py
@@ -1,16 +1,16 @@
 try:
-    import openai
+    from openai import OpenAI
 except ImportError:
-    openai = None
+    OpenAI = None
 import pytest
 import json
 import autogen
 from autogen.math_utils import eval_math_responses
-from test_code import KEY_LOC
+from test_assistant_agent import KEY_LOC
 
 
-@pytest.mark.skipif(openai is None, reason="openai not installed")
-def test_eval_math_responses():
+@pytest.mark.skipif(OpenAI is None, reason="openai>=1 not installed")
+def _test_eval_math_responses():
     config_list = autogen.config_list_from_models(
         KEY_LOC, exclude="aoai", model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"]
     )
@@ -130,4 +130,4 @@ def get_number():
 if __name__ == "__main__":
     test_json_extraction()
     test_execute_function()
-    test_eval_math_responses()
+    # _test_eval_math_responses()
diff --git a/test/agentchat/test_math_user_proxy_agent.py b/test/agentchat/test_math_user_proxy_agent.py
index 7874c2168e4..2975e8d0f16 100644
--- a/test/agentchat/test_math_user_proxy_agent.py
+++ b/test/agentchat/test_math_user_proxy_agent.py
@@ -8,21 +8,23 @@
 )
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
 
+try:
+    from openai import OpenAI
+except ImportError:
+    skip = True
+else:
+    skip = False
+
 
 @pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"],
+    skip or sys.platform in ["darwin", "win32"],
     reason="do not run on MacOS or windows",
 )
 def test_math_user_proxy_agent():
-    try:
-        import openai
-    except ImportError:
-        return
-
     from autogen.agentchat.assistant_agent import AssistantAgent
 
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)
 
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
@@ -35,7 +37,7 @@ def test_math_user_proxy_agent():
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
diff --git a/test/test_with_openai.py b/test/agentchat/test_py10.py
similarity index 92%
rename from test/test_with_openai.py
rename to test/agentchat/test_py10.py
index ed63324be6a..fefb620ad1c 100644
--- a/test/test_with_openai.py
+++ b/test/agentchat/test_py10.py
@@ -1,13 +1,14 @@
 import autogen
 import pytest
 import sys
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
 
 try:
-    import openai
-
-    skip = False
+    from openai import OpenAI
 except ImportError:
     skip = True
+else:
+    skip = False
 
 
 @pytest.mark.skipif(
@@ -21,10 +22,11 @@ def get_random_number():
         return random.randint(0, 100)
 
     config_list_gpt4 = autogen.config_list_from_json(
-        "OAI_CONFIG_LIST",
+        OAI_CONFIG_LIST,
         filter_dict={
             "model": ["gpt-4", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"],
         },
+        file_location=KEY_LOC,
     )
     llm_config = {
         "config_list": config_list_gpt4,
diff --git a/test/agentchat/test_qdrant_retrievechat.py b/test/agentchat/test_qdrant_retrievechat.py
index 9600b507e04..3202b04a0df 100644
--- a/test/agentchat/test_qdrant_retrievechat.py
+++ b/test/agentchat/test_qdrant_retrievechat.py
@@ -30,7 +30,7 @@ def test_retrievechat():
         return
 
     conversations = {}
-    ChatCompletion.start_logging(conversations)
+    # ChatCompletion.start_logging(conversations)
 
     config_list = config_list_from_json(
         OAI_CONFIG_LIST,
@@ -44,7 +44,7 @@ def test_retrievechat():
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
diff --git a/test/agentchat/test_retrievechat.py b/test/agentchat/test_retrievechat.py
index 99e395de505..c9285783367 100644
--- a/test/agentchat/test_retrievechat.py
+++ b/test/agentchat/test_retrievechat.py
@@ -30,7 +30,7 @@ def test_retrievechat():
         return
 
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)
 
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
@@ -44,7 +44,7 @@ def test_retrievechat():
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py
index 7a3367dbd72..698ab77ed5d 100644
--- a/test/agentchat/test_teachable_agent.py
+++ b/test/agentchat/test_teachable_agent.py
@@ -1,14 +1,15 @@
 try:
-    import openai
-
-    skip = False
+    from openai import OpenAI
 except ImportError:
     skip = True
+else:
+    skip = False
+
 import pytest
 import sys
 from autogen import ConversableAgent, config_list_from_json
 from autogen.agentchat.contrib.teachable_agent import TeachableAgent
-
+from test_assistant_agent import OAI_CONFIG_LIST, KEY_LOC
 
 try:
     from termcolor import colored
@@ -24,7 +25,8 @@ def colored(x, *args, **kwargs):
 
 assert_on_error = False  # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable.
 recall_threshold = 1.5  # Higher numbers allow more (but less relevant) memos to be recalled.
-use_cache = False  # If True, cached LLM calls will be skipped and responses pulled from cache. False exposes LLM non-determinism.
+seed = None
+# If int, cached LLM calls will be skipped and responses pulled from cache. None exposes LLM non-determinism.
 
 # Specify the model to use by uncommenting one of the following lines.
 # filter_dict={"model": ["gpt-4-0613"]}
@@ -38,10 +40,10 @@ def create_teachable_agent(reset_db=False, verbosity=0):
     # Load LLM inference endpoints from an env variable or a file
     # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
     # and OAI_CONFIG_LIST_sample
-    config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict)
+    config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, filter_dict=filter_dict, file_location=KEY_LOC)
     teachable_agent = TeachableAgent(
         name="teachableagent",
-        llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache},
+        llm_config={"config_list": config_list, "timeout": 120, "seed": seed},
         teach_config={
             "verbosity": verbosity,
             "reset_db": reset_db,
@@ -137,8 +139,8 @@ def use_task_advice_pair_phrasing():
 
 
 @pytest.mark.skipif(
-    skip or not sys.version.startswith("3.9"),
-    reason="do not run if openai is not installed or py!=3.9",
+    skip or not sys.version.startswith("3.11"),
+    reason="do not run if openai is not installed or py!=3.11",
 )
 def test_all():
     """Runs this file's unit tests."""
diff --git a/test/oai/test_completion.py b/test/oai/_test_completion.py
similarity index 99%
rename from test/oai/test_completion.py
rename to test/oai/_test_completion.py
index dd03864de8e..d120fd4485b 100644
--- a/test/oai/test_completion.py
+++ b/test/oai/_test_completion.py
@@ -13,9 +13,8 @@
     generate_code,
 )
 from autogen.math_utils import eval_math_responses, solve_problem
+from test.oai.test_utils import KEY_LOC, OAI_CONFIG_LIST
 
-KEY_LOC = "notebook"
-OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
 here = os.path.abspath(os.path.dirname(__file__))
 
 
diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py
index 04a8596fb54..579fc6f9d8a 100644
--- a/test/oai/test_utils.py
+++ b/test/oai/test_utils.py
@@ -5,10 +5,12 @@
 import logging
 import tempfile
 from unittest import mock
-from test_completion import KEY_LOC, OAI_CONFIG_LIST
+import autogen  # noqa: E402
+
+KEY_LOC = "notebook"
+OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
 
 sys.path.append("../../autogen")
-import autogen  # noqa: E402
 
 # Example environment variables
 ENV_VARS = {
diff --git a/test/test_client.py b/test/test_client.py
index bcec8cb8f08..99fefa34d03 100644
--- a/test/test_client.py
+++ b/test/test_client.py
@@ -1,5 +1,6 @@
 import pytest
 from autogen import OpenAIWrapper, config_list_from_json, config_list_openai_aoai
+from test_code import OAI_CONFIG_LIST, KEY_LOC
 
 try:
     from openai import OpenAI
@@ -9,11 +10,24 @@
     skip = False
 
 
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_aoai_chat_completion():
+    config_list = config_list_from_json(
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"api_type": ["azure"]},
+    )
+    client = OpenAIWrapper(config_list=config_list)
+    response = client.create(messages=[{"role": "user", "content": "2+2="}])
+    print(response)
+    print(client.extract_text_or_function_call(response))
+
+
 @pytest.mark.skipif(skip, reason="openai>=1 not installed")
 def test_chat_completion():
     config_list = config_list_from_json(
-        env_or_file="OAI_CONFIG_LIST",
-        file_location="notebook",
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(messages=[{"role": "user", "content": "1+1="}])
@@ -23,7 +37,7 @@ def test_chat_completion():
 
 @pytest.mark.skipif(skip, reason="openai>=1 not installed")
 def test_completion():
-    config_list = config_list_openai_aoai("notebook")
+    config_list = config_list_openai_aoai(KEY_LOC)
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(prompt="1+1=", model="gpt-3.5-turbo-instruct")
     print(response)
@@ -31,5 +45,6 @@ def test_completion():
 
 
 if __name__ == "__main__":
+    test_aoai_chat_completion()
     test_chat_completion()
     test_completion()
diff --git a/test/test_code.py b/test/test_code.py
index 65c407cc316..32766d4d70c 100644
--- a/test/test_code.py
+++ b/test/test_code.py
@@ -284,7 +284,7 @@ def test_execute_code_no_docker():
     assert image is None
 
 
-def test_improve():
+def _test_improve():
     try:
         import openai
     except ImportError:
diff --git a/test/test_notebook.py b/test/test_notebook.py
index 68d3052750f..a796204cd9c 100644
--- a/test/test_notebook.py
+++ b/test/test_notebook.py
@@ -56,7 +56,7 @@ def test_agentchat_auto_feedback_from_code(save=False):
     skip or not sys.version.startswith("3.10"),
     reason="do not run if openai is not installed or py!=3.10",
 )
-def test_oai_completion(save=False):
+def _test_oai_completion(save=False):
     run_notebook("oai_completion.ipynb", save=save)
 
 
@@ -80,7 +80,7 @@ def test_agentchat_MathChat(save=False):
     skip or not sys.version.startswith("3.11"),
     reason="do not run if openai is not installed or py!=3.11",
 )
-def test_oai_chatgpt_gpt4(save=False):
+def _test_oai_chatgpt_gpt4(save=False):
     run_notebook("oai_chatgpt_gpt4.ipynb", save=save)
 
 
diff --git a/website/blog/2023-07-14-Local-LLMs/index.mdx b/website/blog/2023-07-14-Local-LLMs/index.mdx
index e7ff8066d5c..8c06ae1e199 100644
--- a/website/blog/2023-07-14-Local-LLMs/index.mdx
+++ b/website/blog/2023-07-14-Local-LLMs/index.mdx
@@ -64,7 +64,7 @@ class CompletionResponseStreamChoice(BaseModel):
 ```
 
 
-## Interact with model using `oai.Completion`
+## Interact with model using `oai.Completion` (requires openai<1)
 
 Now the models can be directly accessed through openai-python library as well as `autogen.oai.Completion` and `autogen.oai.ChatCompletion`.
 
diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx
index 71d2ad3f46c..75fde3916fb 100644
--- a/website/blog/2023-10-18-RetrieveChat/index.mdx
+++ b/website/blog/2023-10-18-RetrieveChat/index.mdx
@@ -292,7 +292,7 @@ llm_config = {
         },
     ],
     "config_list": config_list,
-    "request_timeout": 60,
+    "timeout": 60,
     "seed": 42,
 }
 
diff --git a/website/docs/FAQ.md b/website/docs/FAQ.md
index cf0ce6ace16..92cd8804669 100644
--- a/website/docs/FAQ.md
+++ b/website/docs/FAQ.md
@@ -95,7 +95,7 @@ assistant = autogen.AssistantAgent(
 )
 ```
 
-The `llm_config` is used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
+The `llm_config` is used in the [`create`](/docs/reference/oai/client#create) function for LLM inference.
 When `llm_config` is not provided, the agent will rely on other openai settings such as `openai.api_key` or the environment variable `OPENAI_API_KEY`, which can also work when you'd like to use a single endpoint.
 You can also explicitly specify that by:
 ```python
@@ -108,11 +108,10 @@ Yes. Please check https://microsoft.github.io/autogen/blog/2023/07/14/Local-LLMs
 
 ## Handle Rate Limit Error and Timeout Error
 
-You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
+You can set `max_retries` to handle rate limit error. And you can set `timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the OpenAI client for LLM inference. They can be set differently for different clients if they are set in the `config_list`.
 
-- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
-- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests.
-- `request_timeout` (int): the timeout (in seconds) sent with a single request.
+- `max_retries` (int): the total number of times allowed for retrying failed requests for a single client.
+- `timeout` (int): the timeout (in seconds) for a single client.
 
 Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info.
 
diff --git a/website/docs/Getting-Started.md b/website/docs/Getting-Started.md
index c0e53aea5cd..fb16de2242a 100644
--- a/website/docs/Getting-Started.md
+++ b/website/docs/Getting-Started.md
@@ -46,7 +46,7 @@ The figure below shows an example conversation flow with AutoGen.
 #### Enhanced LLM Inferences
 Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers enhanced LLM inference with powerful functionalites like tuning, caching, error handling, templating. For example, you can optimize generations by LLM with your own tuning data, success metrics and budgets.
 ```python
-# perform tuning
+# perform tuning for openai<1
 config, analysis = autogen.Completion.tune(
     data=tune_data,
     metric="success",
diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index a7b1a17a725..e22a9ff16f0 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -1,6 +1,7 @@
 # Enhanced Inference
 
-`autogen.Completion` is a drop-in replacement of `openai.Completion` and `openai.ChatCompletion` for enhanced LLM inference.
+`autogen.OpenAIWrapper` provides enhanced LLM inference for `openai>=1`.
+`autogen.Completion` is a drop-in replacement of `openai.Completion` and `openai.ChatCompletion` for enhanced LLM inference using `openai<1`.
 There are a number of benefits of using `autogen` to perform inference: performance tuning, API unification, caching, error handling, multi-config inference, result filtering, templating and so on.
 
 ## Tune Inference Parameters
@@ -108,12 +109,14 @@ The tuend config can be used to perform inference.
 
 ## API unification
 
-`autogen.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API.
-When chat models are used and `prompt` is given as the input to `autogen.Completion.create`, the prompt will be automatically converted into `messages` to fit the chat completion API requirement. One advantage is that one can experiment with both chat and non-chat models for the same prompt in a unified API.
+<!-- `autogen.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API.
+When chat models are used and `prompt` is given as the input to `autogen.Completion.create`, the prompt will be automatically converted into `messages` to fit the chat completion API requirement. One advantage is that one can experiment with both chat and non-chat models for the same prompt in a unified API. -->
+
+`autogen.OpenAIWrapper.create()` can be used to create completions for both chat and non-chat models, and both OpenAI API and Azure OpenAI API.
 
 For local LLMs, one can spin up an endpoint using a package like [FastChat](https://github.com/lm-sys/FastChat), and then use the same API to send a request. See [here](/blog/2023/07/14/Local-LLMs) for examples on how to make inference with local LLMs.
 
-When only working with the chat-based models, `autogen.ChatCompletion` can be used. It also does automatic conversion from prompt to messages, if prompt is provided instead of messages.
+<!-- When only working with the chat-based models, `autogen.ChatCompletion` can be used. It also does automatic conversion from prompt to messages, if prompt is provided instead of messages. -->
 
 ## Caching
 
@@ -123,54 +126,57 @@ API call results are cached locally and reused when the same request is issued.
 
 ### Runtime error
 
-It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
+<!-- It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
 
 - `request_timeout` (int): the timeout (in seconds) sent with a single request.
 - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
 - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
 
-Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
+Moreover,  -->
+One can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
 
 ```python
-response = autogen.Completion.create(
+client = OpenAIWrapper(
     config_list=[
         {
             "model": "gpt-4",
             "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
             "api_type": "azure",
             "base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
-            "api_version": "2023-07-01-preview",
+            "api_version": "2023-08-01-preview",
         },
         {
             "model": "gpt-3.5-turbo",
             "api_key": os.environ.get("OPENAI_API_KEY"),
-            "api_type": "open_ai",
             "base_url": "https://api.openai.com/v1",
-            "api_version": None,
         },
         {
-            "model": "llama-7B",
+            "model": "llama2-chat-7B",
             "base_url": "http://127.0.0.1:8080",
-            "api_type": "open_ai",
-            "api_version": None,
         }
     ],
-    prompt="Hi",
 )
 ```
 
-It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama-7B one by one, ignoring AuthenticationError, RateLimitError and Timeout,
+It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama2-chat-7B one by one,
 until a valid result is returned. This can speed up the development process where the rate limit is a bottleneck. An error will be raised if the last choice fails. So make sure the last choice in the list has the best availability.
 
-For convenience, we provide a number of utility functions to load config lists, such as [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json): The config list like the list of dicts above can be saved in an environment variable or a file in json format and loaded with this function.
+For convenience, we provide a number of utility functions to load config lists.
+- `get_config_list`: Generates configurations for API calls, primarily from provided API keys.
+- `config_list_openai_aoai`: Constructs a list of configurations using both Azure OpenAI and OpenAI endpoints, sourcing API keys from environment variables or local files.
+- `config_list_from_json`: Loads configurations from a JSON structure, either from an environment variable or a local JSON file, with the flexibility of filtering configurations based on given criteria.
+- `config_list_from_models`: Creates configurations based on a provided list of models, useful when targeting specific models without manually specifying each configuration.
+- `config_list_from_dotenv`: Constructs a configuration list from a `.env` file, offering a consolidated way to manage multiple API configurations and keys from a single file.
+
+We suggest that you take a look at this [notebook](https://github.com/microsoft/autogen/blob/main/notebook/oai_openai_utils.ipynb) for full code examples of the different methods to configure your model endpoints.
 
 ### Logic error
 
 Another type of error is that the returned response does not satisfy a requirement. For example, if the response is required to be a valid json string, one would like to filter the responses that are not. This can be achieved by providing a list of configurations and a filter function. For example,
 
 ```python
-def valid_json_filter(context, config, response):
-    for text in autogen.Completion.extract_text(response):
+def valid_json_filter(response, **_):
+    for text in OpenAIWrapper.extract_text_or_function_call(response):
         try:
             json.loads(text)
             return True
@@ -178,14 +184,14 @@ def valid_json_filter(context, config, response):
             pass
     return False
 
-response = autogen.Completion.create(
-    config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
+response = client.create(
+    config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo-instruct"}, {"model": "text-davinci-003"}],
     prompt="How to construct a json request to Bing API to search for 'latest AI news'? Return the JSON request.",
     filter_func=valid_json_filter,
 )
 ```
 
-The example above will try to use text-ada-001, gpt-3.5-turbo, and text-davinci-003 iteratively, until a valid json string is returned or the last config is used. One can also repeat the same model in the list for multiple times to try one model multiple times for increasing the robustness of the final response.
+The example above will try to use text-ada-001, gpt-3.5-turbo-instruct, and text-davinci-003 iteratively, until a valid json string is returned or the last config is used. One can also repeat the same model in the list for multiple times to try one model multiple times for increasing the robustness of the final response.
 
 *Advanced use case: Check this [blogpost](/blog/2023/05/18/GPT-adaptive-humaneval) to find how to improve GPT-4's coding performance from 68% to 90% while reducing the inference cost.*
 
@@ -194,7 +200,7 @@ The example above will try to use text-ada-001, gpt-3.5-turbo, and text-davinci-
 If the provided prompt or message is a template, it will be automatically materialized with a given context. For example,
 
 ```python
-response = autogen.Completion.create(
+response = client.create(
     context={"problem": "How many positive integers, not exceeding 100, are multiples of 2 or 3 but not 4?"},
     prompt="{problem} Solve the problem carefully.",
     allow_format_str_template=True,
@@ -228,11 +234,11 @@ context = {
     "external_info_0": "Problem 1: ...",
 }
 
-response = autogen.ChatCompletion.create(context, messages=messages, **config)
+response = client.create(context=context, messages=messages, **config)
 messages.append(
     {
         "role": "assistant",
-        "content": autogen.ChatCompletion.extract_text(response)[0]
+        "content": client.extract_text(response)[0]
     }
 )
 messages.append(
@@ -247,10 +253,10 @@ context.append(
         "external_info_1": "Theorem 1: ...",
     }
 )
-response = autogen.ChatCompletion.create(context, messages=messages, **config)
+response = client.create(context=context, messages=messages, **config)
 ```
 
-## Logging (Experimental)
+## Logging (for openai<1)
 
 When debugging or diagnosing an LLM-based system, it is often convenient to log the API calls and analyze them. `autogen.Completion` and `autogen.ChatCompletion` offer an easy way to collect the API call histories. For example, to log the chat histories, simply run:
 ```python

From 9b25c91744cf4b69016203d7077673a7d2d7546f Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 02:50:35 +0000
Subject: [PATCH 10/35] annotations

---
 autogen/oai/client.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 8dda5ac3272..8bc97b30307 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import sys
 from typing import List, Optional, Dict, Callable

From 8c1626cc94a4474ddc7e6f0e8c746ef19d4166d3 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 02:51:24 +0000
Subject: [PATCH 11/35] import

---
 autogen/oai/client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 8bc97b30307..35705f2b0fc 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -6,13 +6,13 @@
 import logging
 import inspect
 from flaml.automl.logger import logger_formatter
-from openai.types.chat import ChatCompletion
-from openai.types.completion import Completion
 
 from autogen.oai.openai_utils import get_key
 
 try:
     from openai import OpenAI, APIError
+    from openai.types.chat import ChatCompletion
+    from openai.types.completion import Completion
     import diskcache
 
     ERROR = None

From 8d42528c8955f65aa25306cced5afd4d77a664bd Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:04:15 +0000
Subject: [PATCH 12/35] reduce test

---
 .github/workflows/build.yml  |  4 ++--
 .github/workflows/openai.yml | 16 ++++++++--------
 setup.py                     | 14 +++++++-------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 5e5fd186bea..8e15e9d5d6a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install --upgrade pip wheel
           pip install -e .
           python -c "import autogen"
-          pip install -e.[mathchat,retrievechat,test] datasets pytest
+          pip install -e. pytest
           pip uninstall -y openai
       - name: Test with pytest
         if: matrix.python-version != '3.10'
@@ -49,7 +49,7 @@ jobs:
       - name: Coverage
         if: matrix.python-version == '3.10'
         run: |
-          pip install coverage
+          pip install -e.[mathchat,test] coverage
           coverage run -a -m pytest test
           coverage xml
       - name: Upload coverage to Codecov
diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 13e6c4124f2..210e188beb9 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -49,14 +49,14 @@ jobs:
         if: matrix.python-version != '3.11'
         run: |
           pip install -e .[mathchat]
-      - name: Install packages for RetrieveChat when needed
-        if: matrix.python-version == '3.9'
-        run: |
-          pip install -e .[retrievechat]
-      - name: Install packages for Teachable when needed
-        if: matrix.python-version == '3.11'
-        run: |
-          pip install -e .[teachable]
+      # - name: Install packages for RetrieveChat when needed
+      #   if: matrix.python-version == '3.9'
+      #   run: |
+      #     pip install -e .[retrievechat]
+      # - name: Install packages for Teachable when needed
+      #   if: matrix.python-version == '3.11'
+      #   run: |
+      #     pip install -e .[teachable]
       - name: Coverage
         if: matrix.python-version == '3.9'
         env:
diff --git a/setup.py b/setup.py
index 688a783f6e0..262093366c2 100644
--- a/setup.py
+++ b/setup.py
@@ -39,20 +39,20 @@
     install_requires=install_requires,
     extras_require={
         "test": [
-            "chromadb",
-            "lancedb",
+            # "chromadb",
+            # "lancedb",
             "coverage>=5.3",
-            "datasets",
+            # "datasets",
             "ipykernel",
             "nbconvert",
             "nbformat",
             "pre-commit",
-            "pydantic==1.10.9",
+            # "pydantic==1.10.9",
             "pytest-asyncio",
             "pytest>=6.1.1",
-            "sympy",
-            "tiktoken",
-            "wolframalpha",
+            # "sympy",
+            # "tiktoken",
+            # "wolframalpha",
         ],
         "blendsearch": ["flaml[blendsearch]"],
         "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"],

From b8302a7583f79ffb45256ebc896367dbf99f8cc4 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:11:04 +0000
Subject: [PATCH 13/35] skip test

---
 test/agentchat/test_teachable_agent.py |  4 +--
 test/test_retrieve_utils.py            | 34 ++++++++++++++------------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py
index 698ab77ed5d..5a5be0379a1 100644
--- a/test/agentchat/test_teachable_agent.py
+++ b/test/agentchat/test_teachable_agent.py
@@ -1,5 +1,6 @@
 try:
     from openai import OpenAI
+    from autogen.agentchat.contrib.teachable_agent import TeachableAgent
 except ImportError:
     skip = True
 else:
@@ -8,7 +9,6 @@
 import pytest
 import sys
 from autogen import ConversableAgent, config_list_from_json
-from autogen.agentchat.contrib.teachable_agent import TeachableAgent
 from test_assistant_agent import OAI_CONFIG_LIST, KEY_LOC
 
 try:
@@ -140,7 +140,7 @@ def use_task_advice_pair_phrasing():
 
 @pytest.mark.skipif(
     skip or not sys.version.startswith("3.11"),
-    reason="do not run if openai is not installed or py!=3.11",
+    reason="do not run if dependency is not installed or py!=3.11",
 )
 def test_all():
     """Runs this file's unit tests."""
diff --git a/test/test_retrieve_utils.py b/test/test_retrieve_utils.py
index 81fb1a0969a..c50b20fa34f 100644
--- a/test/test_retrieve_utils.py
+++ b/test/test_retrieve_utils.py
@@ -1,21 +1,24 @@
 """
 Unit test for retrieve_utils.py
 """
-
-from autogen.retrieve_utils import (
-    split_text_to_chunks,
-    extract_text_from_pdf,
-    split_files_to_chunks,
-    get_files_from_dir,
-    get_file_from_url,
-    is_url,
-    create_vector_db_from_dir,
-    query_vector_db,
-    num_tokens_from_text,
-    num_tokens_from_messages,
-    TEXT_FORMATS,
-)
-
+try:
+    from autogen.retrieve_utils import (
+        split_text_to_chunks,
+        extract_text_from_pdf,
+        split_files_to_chunks,
+        get_files_from_dir,
+        get_file_from_url,
+        is_url,
+        create_vector_db_from_dir,
+        query_vector_db,
+        num_tokens_from_text,
+        num_tokens_from_messages,
+        TEXT_FORMATS,
+    )
+except ImportError:
+    skip = True
+else:
+    skip = False
 import os
 import sys
 import pytest
@@ -30,6 +33,7 @@
 integration, testing, and deployment."""
 
 
+@pytest.mark.skipif(skip, reason="dependency not installed")
 class TestRetrieveUtils:
     def test_num_tokens_from_text_custom_token_count_function(self):
         def custom_token_count_function(text):

From b09e6bb43fb3d757453d9c9abfa58bbc9fa16c21 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:13:58 +0000
Subject: [PATCH 14/35] skip test

---
 test/test_retrieve_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/test_retrieve_utils.py b/test/test_retrieve_utils.py
index c50b20fa34f..dd94eed9d43 100644
--- a/test/test_retrieve_utils.py
+++ b/test/test_retrieve_utils.py
@@ -2,6 +2,8 @@
 Unit test for retrieve_utils.py
 """
 try:
+    import chromadb
+    import tiktoken
     from autogen.retrieve_utils import (
         split_text_to_chunks,
         extract_text_from_pdf,
@@ -20,10 +22,7 @@
 else:
     skip = False
 import os
-import sys
 import pytest
-import chromadb
-import tiktoken
 
 
 test_dir = os.path.join(os.path.dirname(__file__), "test_files")

From f29fbc533b705cc69b65aa0c88145c960495d303 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:19:14 +0000
Subject: [PATCH 15/35] skip test

---
 .github/workflows/build.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8e15e9d5d6a..2d6b3cfcb44 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -49,7 +49,8 @@ jobs:
       - name: Coverage
         if: matrix.python-version == '3.10'
         run: |
-          pip install -e.[mathchat,test] coverage
+          pip install -e.[mathchat,test]
+          pip uninstall -y openai
           coverage run -a -m pytest test
           coverage xml
       - name: Upload coverage to Codecov

From 4318e0e661f41fa2f30abc1275a4efc5c394f6e9 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:39:53 +0000
Subject: [PATCH 16/35] debug test

---
 .github/workflows/openai.yml | 4 ++--
 website/docs/Contribute.md   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 210e188beb9..8bac9767040 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -12,8 +12,8 @@ on:
       - 'notebook/agentchat_auto_feedback_from_code_execution.ipynb'
       - 'notebook/agentchat_function_call.ipynb'
       - 'notebook/agentchat_MathChat.ipynb'
-      - 'notebook/oai_completion.ipynb'
-      - 'notebook/oai_chatgpt_gpt4.ipynb'
+      # - 'notebook/oai_completion.ipynb'
+      # - 'notebook/oai_chatgpt_gpt4.ipynb'
       - '.github/workflows/openai.yml'
 
 jobs:
diff --git a/website/docs/Contribute.md b/website/docs/Contribute.md
index 8f264c9bd73..7d41b8c906e 100644
--- a/website/docs/Contribute.md
+++ b/website/docs/Contribute.md
@@ -120,7 +120,7 @@ Tests are automatically run via GitHub actions. There are two workflows:
 1. [build.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/build.yml)
 1. [openai.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/openai.yml)
 
-The first workflow is required to pass for all PRs. The second workflow is required for changes that affect the openai tests. The second workflow requires approval to run. When writing tests that require openai, please use [`pytest.mark.skipif`](https://github.com/microsoft/autogen/blob/a456b512d5a933ce9707ce51c465ea35a9dd180c/test/test_with_openai.py#L13) to make them run in one python version only when openai is installed. If additional dependency for this test is required, install the dependency in the corresponding python version in [openai.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/openai.yml).
+The first workflow is required to pass for all PRs. The second workflow is required for changes that affect the openai tests. The second workflow requires approval to run. When writing tests that require openai, please use [`pytest.mark.skipif`](https://github.com/microsoft/autogen/blob/main/test/test_client.py#L13) to make them run in one python version only when openai is installed. If additional dependency for this test is required, install the dependency in the corresponding python version in [openai.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/openai.yml).
 
 ### Coverage
 

From 153f1823a1814f6f03b54354d6f16db916e15d59 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:50:53 +0000
Subject: [PATCH 17/35] rename test

---
 .github/workflows/openai.yml                                    | 2 +-
 .../agentchat/{test_py10.py => test_function_call_groupchat.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename test/agentchat/{test_py10.py => test_function_call_groupchat.py} (100%)

diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 8bac9767040..5d17fa464ff 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -77,7 +77,7 @@ jobs:
           OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
         run: |
           pip install nbconvert nbformat ipykernel
-          coverage run -a -m pytest test/agentchat/test_py10.py
+          coverage run -a -m pytest test/agentchat/test_function_call_groupchat.py
           coverage run -a -m pytest test/agentchat/test_teachable_agent.py
           coverage run -a -m pytest test/test_notebook.py
           coverage xml
diff --git a/test/agentchat/test_py10.py b/test/agentchat/test_function_call_groupchat.py
similarity index 100%
rename from test/agentchat/test_py10.py
rename to test/agentchat/test_function_call_groupchat.py

From 645d60e7370adeaf6237381da134fded99c42132 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:53:52 +0000
Subject: [PATCH 18/35] update workflow

---
 .github/workflows/openai.yml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 5d17fa464ff..71668b97f9b 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -12,8 +12,6 @@ on:
       - 'notebook/agentchat_auto_feedback_from_code_execution.ipynb'
       - 'notebook/agentchat_function_call.ipynb'
       - 'notebook/agentchat_MathChat.ipynb'
-      # - 'notebook/oai_completion.ipynb'
-      # - 'notebook/oai_chatgpt_gpt4.ipynb'
       - '.github/workflows/openai.yml'
 
 jobs:
@@ -49,14 +47,6 @@ jobs:
         if: matrix.python-version != '3.11'
         run: |
           pip install -e .[mathchat]
-      # - name: Install packages for RetrieveChat when needed
-      #   if: matrix.python-version == '3.9'
-      #   run: |
-      #     pip install -e .[retrievechat]
-      # - name: Install packages for Teachable when needed
-      #   if: matrix.python-version == '3.11'
-      #   run: |
-      #     pip install -e .[teachable]
       - name: Coverage
         if: matrix.python-version == '3.9'
         env:

From 62eabc8e303b55bcd80acba420e14c88a85e5e2b Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 03:58:06 +0000
Subject: [PATCH 19/35] update workflow

---
 .github/workflows/openai.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 71668b97f9b..1b0c69d9b36 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -4,7 +4,7 @@
 name: OpenAI
 
 on:
-  pull_request_target:
+  pull_request:
     branches: ['main']
     paths:
       - 'autogen/**'

From f8956334e6345d9a0c871cc249dba9fcbb7da941 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 04:06:58 +0000
Subject: [PATCH 20/35] env

---
 .github/workflows/openai.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 1b0c69d9b36..00a443ecd56 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -21,7 +21,7 @@ jobs:
         os: [ubuntu-latest]
         python-version: ["3.9", "3.10", "3.11"]
     runs-on: ${{ matrix.os }}
-    environment: openai
+    environment: openai1
     steps:
       # checkout to pr branch
       - name: Checkout

From a72c89df591c61ab137b8af5c8a7daead9a08124 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 04:12:41 +0000
Subject: [PATCH 21/35] py version

---
 test/test_notebook.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_notebook.py b/test/test_notebook.py
index a796204cd9c..e6106bacf69 100644
--- a/test/test_notebook.py
+++ b/test/test_notebook.py
@@ -45,8 +45,8 @@ def run_notebook(input_nb, output_nb="executed_openai_notebook.ipynb", save=Fals
 
 
 @pytest.mark.skipif(
-    skip or not sys.version.startswith("3.10"),
-    reason="do not run if openai is not installed or py!=3.10",
+    skip or not sys.version.startswith("3.11"),
+    reason="do not run if openai is not installed or py!=3.11",
 )
 def test_agentchat_auto_feedback_from_code(save=False):
     run_notebook("agentchat_auto_feedback_from_code_execution.ipynb", save=save)

From 9073eb7a370101b2b493ed4a647b63534cacc864 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 13:19:10 +0000
Subject: [PATCH 22/35] doc improvement

---
 website/docs/Use-Cases/enhanced_inference.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index e22a9ff16f0..21ce42c3ca1 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -4,7 +4,7 @@
 `autogen.Completion` is a drop-in replacement of `openai.Completion` and `openai.ChatCompletion` for enhanced LLM inference using `openai<1`.
 There are a number of benefits of using `autogen` to perform inference: performance tuning, API unification, caching, error handling, multi-config inference, result filtering, templating and so on.
 
-## Tune Inference Parameters
+## Tune Inference Parameters (for openai<1)
 
 *Links to notebook examples:*
 * [Optimize for Code Generation](https://github.com/microsoft/autogen/blob/main/notebook/oai_completion.ipynb)
@@ -120,7 +120,7 @@ For local LLMs, one can spin up an endpoint using a package like [FastChat](http
 
 ## Caching
 
-API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed", using `set_cache` or specifying in `create()`.
+API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
 
 ## Error handling
 
@@ -191,7 +191,7 @@ response = client.create(
 )
 ```
 
-The example above will try to use text-ada-001, gpt-3.5-turbo-instruct, and text-davinci-003 iteratively, until a valid json string is returned or the last config is used. One can also repeat the same model in the list for multiple times to try one model multiple times for increasing the robustness of the final response.
+The example above will try to use text-ada-001, gpt-3.5-turbo-instruct, and text-davinci-003 iteratively, until a valid json string is returned or the last config is used. One can also repeat the same model in the list for multiple times (with different seeds) to try one model multiple times for increasing the robustness of the final response.
 
 *Advanced use case: Check this [blogpost](/blog/2023/05/18/GPT-adaptive-humaneval) to find how to improve GPT-4's coding performance from 68% to 90% while reducing the inference cost.*
 

From b0ad39b816ac6d412ca6655b3bac41031e00075e Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 13:48:16 +0000
Subject: [PATCH 23/35] docstr update

---
 autogen/agentchat/contrib/teachable_agent.py     | 4 ++--
 autogen/agentchat/contrib/text_analyzer_agent.py | 2 +-
 autogen/agentchat/groupchat.py                   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py
index a3334d6836a..29d7f197ffc 100644
--- a/autogen/agentchat/contrib/teachable_agent.py
+++ b/autogen/agentchat/contrib/teachable_agent.py
@@ -18,7 +18,7 @@ def colored(x, *args, **kwargs):
 
 
 class TeachableAgent(ConversableAgent):
-    """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings.
+    """(Experimental) Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings.
     In this class, the term 'user' refers to any caller (human or not) sending messages to this agent.
     Not yet tested in the group-chat setting."""
 
@@ -269,7 +269,7 @@ def analyze(self, text_to_analyze, analysis_instructions):
 
 
 class MemoStore:
-    """
+    """(Experimental)
     Provides memory storage and retrieval for a TeachableAgent, using a vector database.
     Each DB entry (called a memo) is a pair of strings: an input text and an output text.
     The input text might be a question, or a task to perform.
diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py
index 2df839a1172..cfc5bc174b3 100644
--- a/autogen/agentchat/contrib/text_analyzer_agent.py
+++ b/autogen/agentchat/contrib/text_analyzer_agent.py
@@ -10,7 +10,7 @@
 
 
 class TextAnalyzerAgent(ConversableAgent):
-    """Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed."""
+    """(Experimental) Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed."""
 
     def __init__(
         self,
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 9ed2ff77464..944288a74bc 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -10,7 +10,7 @@
 
 @dataclass
 class GroupChat:
-    """A group chat class that contains the following data fields:
+    """(In preview) A group chat class that contains the following data fields:
     - agents: a list of participating agents.
     - messages: a list of messages in the group chat.
     - max_round: the maximum number of rounds.

From 33de6a3c98677de6b2b76b17a36e6889729f095e Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Thu, 26 Oct 2023 15:25:28 +0000
Subject: [PATCH 24/35] openai<1

---
 autogen/code_utils.py     | 14 +++++++-------
 autogen/math_utils.py     |  2 +-
 autogen/oai/completion.py |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/autogen/code_utils.py b/autogen/code_utils.py
index 8c62dc17c6b..9965609f1b8 100644
--- a/autogen/code_utils.py
+++ b/autogen/code_utils.py
@@ -85,7 +85,7 @@ def extract_code(
 
 
 def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, float]:
-    """(Deprecated) Generate code.
+    """(openai<1) Generate code.
 
     Args:
         pattern (Optional, str): The regular expression pattern for finding the code block.
@@ -110,7 +110,7 @@ def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, flo
 
 
 def improve_function(file_name, func_name, objective, **config):
-    """(Deprecated) Improve the function to achieve the objective."""
+    """(openai<1) Improve the function to achieve the objective."""
     params = {**_IMPROVE_FUNCTION_CONFIG, **config}
     # read the entire file into a str
     with open(file_name, "r") as f:
@@ -131,7 +131,7 @@ def improve_function(file_name, func_name, objective, **config):
 
 
 def improve_code(files, objective, suggest_only=True, **config):
-    """(Deprecated) Improve the code to achieve a given objective.
+    """(openai<1) Improve the code to achieve a given objective.
 
     Args:
         files (list): A list of file names containing the source code.
@@ -381,7 +381,7 @@ def execute_code(
 
 
 def generate_assertions(definition: str, **config) -> Tuple[str, float]:
-    """(Deprecated) Generate assertions for a function.
+    """(openai<1) Generate assertions for a function.
 
     Args:
         definition (str): The function definition, including the signature and docstr.
@@ -418,7 +418,7 @@ def eval_function_completions(
     timeout: Optional[float] = 3,
     use_docker: Optional[bool] = True,
 ) -> Dict:
-    """(Deprecated) Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
+    """(openai<1) Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
 
     Args:
         responses (list): The list of responses.
@@ -508,7 +508,7 @@ def __init__(self, assertions):
         self.metrics = self.responses = None
 
     def pass_assertions(self, context, response, **_):
-        """(Deprecated) Check if the response passes the assertions."""
+        """(openai<1) Check if the response passes the assertions."""
         responses = oai.Completion.extract_text(response)
         metrics = eval_function_completions(responses, context["definition"], assertions=self._assertions)
         self._assertions = metrics["assertions"]
@@ -523,7 +523,7 @@ def implement(
     configs: Optional[List[Dict]] = None,
     assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = generate_assertions,
 ) -> Tuple[str, float]:
-    """(Deprecated) Implement a function from a definition.
+    """(openai<1) Implement a function from a definition.
 
     Args:
         definition (str): The function definition, including the signature and docstr.
diff --git a/autogen/math_utils.py b/autogen/math_utils.py
index eb512473389..00fcae57ad2 100644
--- a/autogen/math_utils.py
+++ b/autogen/math_utils.py
@@ -9,7 +9,7 @@
 
 
 def solve_problem(problem: str, **config) -> str:
-    """(Deprecated) Solve the math problem.
+    """(openai<1) Solve the math problem.
 
     Args:
         problem (str): The problem statement.
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index f4cf171e22d..1435400501d 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -37,7 +37,7 @@
 
 
 class Completion(openai_Completion):
-    """(Deprecated) A class for OpenAI completion API.
+    """(openai<1) A class for OpenAI completion API.
 
     It also supports: ChatCompletion, Azure OpenAI API.
     """
@@ -1172,7 +1172,7 @@ def stop_logging(cls):
 
 
 class ChatCompletion(Completion):
-    """(Deprecated) A class for OpenAI API ChatCompletion. Share the same API as Completion."""
+    """(openai<1) A class for OpenAI API ChatCompletion. Share the same API as Completion."""
 
     default_search_space = Completion.default_search_space.copy()
     default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])

From 3b567c9dd80e7518c1c97334c890c3cca8143443 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sat, 28 Oct 2023 02:35:31 +0000
Subject: [PATCH 25/35] add tiktoken to dependency

---
 autogen/token_count_utils.py | 4 ++--
 setup.py                     | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/autogen/token_count_utils.py b/autogen/token_count_utils.py
index fd9d61a10a1..9e254932faf 100644
--- a/autogen/token_count_utils.py
+++ b/autogen/token_count_utils.py
@@ -1,7 +1,7 @@
-import tiktoken
-from typing import List, Union, Dict, Tuple
+from typing import List, Union, Dict
 import logging
 import json
+import tiktoken
 
 
 logger = logging.getLogger(__name__)
diff --git a/setup.py b/setup.py
index 2717b67afa9..d276a4912f3 100644
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@
     "termcolor",
     "flaml",
     "python-dotenv",
+    "tiktoken",
 ]
 
 

From 3cb39304c39b7b6c6068e0df91e2f5e1992d6d19 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sat, 28 Oct 2023 12:22:38 +0000
Subject: [PATCH 26/35] filter_func

---
 autogen/oai/completion.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index 1435400501d..5f990e54d7c 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -703,7 +703,7 @@ def create(
         context: Optional[Dict] = None,
         use_cache: Optional[bool] = True,
         config_list: Optional[List[Dict]] = None,
-        filter_func: Optional[Callable[[Dict, Dict, Dict], bool]] = None,
+        filter_func: Optional[Callable[[Dict, Dict], bool]] = None,
         raise_on_ratelimit_or_timeout: Optional[bool] = True,
         allow_format_str_template: Optional[bool] = False,
         **config,
@@ -749,7 +749,7 @@ def create(
         )
         ```
 
-            filter_func (Callable, Optional): A function that takes in the context, the config and the response and returns a boolean to indicate whether the response is valid. E.g.,
+            filter_func (Callable, Optional): A function that takes in the context and the response and returns a boolean to indicate whether the response is valid. E.g.,
 
         ```python
         def yes_or_no_filter(context, config, response):
@@ -803,9 +803,7 @@ def yes_or_no_filter(context, config, response):
                     )
                     if response == -1:
                         return response
-                    pass_filter = filter_func is None or filter_func(
-                        context=context, base_config=config, response=response
-                    )
+                    pass_filter = filter_func is None or filter_func(context=context, response=response)
                     if pass_filter or i == last:
                         response["cost"] = cost + response["cost"]
                         response["config_id"] = i

From cc9f0e0a4ef0bb68dfe28fc9e9e346714764a743 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sun, 29 Oct 2023 03:30:05 +0000
Subject: [PATCH 27/35] async test

---
 test/agentchat/test_async.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/agentchat/test_async.py b/test/agentchat/test_async.py
index f8f89b7495b..ba332b25379 100644
--- a/test/agentchat/test_async.py
+++ b/test/agentchat/test_async.py
@@ -1,3 +1,4 @@
+import pytest
 import asyncio
 import autogen
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
@@ -44,6 +45,7 @@ def get_market_news(ind, ind_upper):
     return feeds_summary
 
 
+@pytest.mark.asyncio
 async def test_stream():
     try:
         import openai

From 82f3712b8c583771af8122d2cfd5b74f1280b870 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sun, 29 Oct 2023 14:29:32 +0000
Subject: [PATCH 28/35] dependency

---
 .github/workflows/openai.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 3d50b39ff7c..c56028dac00 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -42,6 +42,10 @@ jobs:
         if: matrix.python-version == '3.9'
         run: |
           pip install docker
+      - name: Install dependencies for test when needed
+        if: matrix.python-version == '3.10'  # test_agentchat_function_call
+        run: |
+          pip install -e.[mathchat]
       - name: Coverage
         if: matrix.python-version == '3.9'
         env:

From 420cbf0407ba3045550fed6c76b4dad022306833 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sun, 29 Oct 2023 19:58:26 -0700
Subject: [PATCH 29/35] migration guide (#477)

* migration guide

* change in kwargs

* simplify header

* update optigude description
---
 README.md                                     | 22 ++++++-----
 autogen/oai/client.py                         |  2 +-
 autogen/oai/completion.py                     | 20 +++++++++-
 notebook/agentchat_qdrant_RetrieveChat.ipynb  |  4 +-
 notebook/oai_openai_utils.ipynb               |  2 +-
 .../blog/2023-10-26-TeachableAgent/index.mdx  |  2 +-
 .../{AutoGen-AgentChat.md => AgentChat.md}    |  4 +-
 .../{AutoGen-Inference.md => Inference.md}    |  2 +-
 website/docs/Getting-Started.md               | 14 +++----
 website/docs/Installation.md                  | 32 ++++++++++++----
 website/docs/Use-Cases/agent_chat.md          |  2 +-
 website/docs/Use-Cases/enhanced_inference.md  | 37 +++++++++++++++++--
 12 files changed, 106 insertions(+), 37 deletions(-)
 rename website/docs/Examples/{AutoGen-AgentChat.md => AgentChat.md} (95%)
 rename website/docs/Examples/{AutoGen-Inference.md => Inference.md} (96%)

diff --git a/README.md b/README.md
index d95c7430f9b..d7747585287 100644
--- a/README.md
+++ b/README.md
@@ -28,11 +28,11 @@ AutoGen is a framework that enables the development of LLM applications using mu
 
 ![AutoGen Overview](https://github.com/microsoft/autogen/blob/main/website/static/img/autogen_agentchat.png)
 
-- AutoGen enables building next-gen LLM applications based on **multi-agent conversations** with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses.
-- It supports **diverse conversation patterns** for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
+- AutoGen enables building next-gen LLM applications based on [multi-agent conversations](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses.
+- It supports [diverse conversation patterns](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#supporting-diverse-conversation-patterns) for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
   the number of agents, and agent conversation topology.
-- It provides a collection of working systems with different complexities. These systems span a **wide range of applications** from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns.
-- AutoGen provides **enhanced LLM inference**. It offers easy performance tuning, plus utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc.
+- It provides a collection of working systems with different complexities. These systems span a [wide range of applications](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#diverse-applications-implemented-with-autogen) from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns.
+- AutoGen provides [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification). It offers utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc.
 
 AutoGen is powered by collaborative [research studies](https://microsoft.github.io/autogen/docs/Research) from Microsoft, Penn State University, and the University of Washington.
 
@@ -42,14 +42,14 @@ The easiest way to start playing is
 
     [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/microsoft/autogen?quickstart=1)
 
- 2. Copy OAI_CONFIG_LIST_sample to /notebook folder, name to OAI_CONFIG_LIST, and set the correct config.
+ 2. Copy OAI_CONFIG_LIST_sample to ./notebook folder, name to OAI_CONFIG_LIST, and set the correct config.
  3. Start playing with the notebooks!
 
 
 
 ## Installation
 
-AutoGen requires **Python version >= 3.8**. It can be installed from pip:
+AutoGen requires **Python version >= 3.8, < 3.12**. It can be installed from pip:
 
 ```bash
 pip install pyautogen
@@ -72,7 +72,7 @@ For LLM inference configurations, check the [FAQs](https://microsoft.github.io/a
 
 ## Multi-Agent Conversation Framework
 
-Autogen enables the next-gen LLM applications with a generic multi-agent conversation framework. It offers customizable and conversable agents that integrate LLMs, tools, and humans.
+Autogen enables the next-gen LLM applications with a generic [multi-agent conversation](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) framework. It offers customizable and conversable agents that integrate LLMs, tools, and humans.
 By automating chat among multiple capable agents, one can easily make them collectively perform tasks autonomously or with human feedback, including tasks that require using tools via code.
 
 Features of this use case include:
@@ -106,11 +106,13 @@ After the repo is cloned.
 The figure below shows an example conversation flow with AutoGen.
 ![Agent Chat Example](https://github.com/microsoft/autogen/blob/main/website/static/img/chat_example.png)
 
-Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/AutoGen-AgentChat) for this feature.
+Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/AgentChat) for this feature.
 
 ## Enhanced LLM Inferences
 
-Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers enhanced LLM inference with powerful functionalities like tuning, caching, error handling, and templating. For example, you can optimize generations by LLM with your own tuning data, success metrics, and budgets.
+Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification) with powerful functionalities like caching, error handling, multi-config inference and templating.
+
+<!-- For example, you can optimize generations by LLM with your own tuning data, success metrics, and budgets.
 
 ```python
 # perform tuning for openai<1
@@ -127,7 +129,7 @@ config, analysis = autogen.Completion.tune(
 response = autogen.Completion.create(context=test_instance, **config)
 ```
 
-Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/AutoGen-Inference) for this feature.
+Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/Inference) for this feature. -->
 
 ## Documentation
 
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 35705f2b0fc..b6035162104 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -17,7 +17,7 @@
 
     ERROR = None
 except ImportError:
-    ERROR = ImportError("please install openai>=1 and diskcache to use the autogen.oai subpackage.")
+    ERROR = ImportError("Please install openai>=1 and diskcache to use autogen.OpenAIWrapper.")
     OpenAI = object
 logger = logging.getLogger(__name__)
 if not logger.handlers:
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index 5f990e54d7c..88d53bca4c0 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -26,7 +26,10 @@
 
     ERROR = None
 except ImportError:
-    ERROR = ImportError("please install openai and diskcache to use the autogen.oai subpackage.")
+    ERROR = ImportError(
+        "(Deprecated) The autogen.Completion class requires openai<1 and diskcache. "
+        "Please switch to autogen.OpenAIWrapper for openai>=1."
+    )
     openai_Completion = object
 logger = logging.getLogger(__name__)
 if not logger.handlers:
@@ -567,6 +570,10 @@ def eval_func(responses, **data):
             dict: The optimized hyperparameter setting.
             tune.ExperimentAnalysis: The tuning results.
         """
+        logger.warning(
+            "tuning via Completion.tune is deprecated in pyautogen v0.2 and openai>=1. "
+            "flaml.tune supports tuning more generically."
+        )
         if ERROR:
             raise ERROR
         space = cls.default_search_space.copy()
@@ -775,6 +782,11 @@ def yes_or_no_filter(context, config, response):
                 - `config_id`: the index of the config in the config_list that is used to generate the response.
                 - `pass_filter`: whether the response passes the filter function. None if no filter is provided.
         """
+        logger.warning(
+            "Completion.create is deprecated in pyautogen v0.2 and openai>=1. "
+            "The new openai requires initiating a client for inference. "
+            "Please refer to https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification"
+        )
         if ERROR:
             raise ERROR
 
@@ -1159,6 +1171,12 @@ def start_logging(
                 while the compact history dict has a linear size.
             reset_counter (bool): whether to reset the counter of the number of API calls.
         """
+        logger.warning(
+            "logging via Completion.start_logging is deprecated in pyautogen v0.2. "
+            "logging via OpenAIWrapper will be added back in a future release."
+        )
+        if ERROR:
+            raise ERROR
         cls._history_dict = {} if history_dict is None else history_dict
         cls._history_compact = compact
         cls._count_create = 0 if reset_counter or cls._count_create is None else cls._count_create
diff --git a/notebook/agentchat_qdrant_RetrieveChat.ipynb b/notebook/agentchat_qdrant_RetrieveChat.ipynb
index 3a97007c5d9..b05848c1c5d 100644
--- a/notebook/agentchat_qdrant_RetrieveChat.ipynb
+++ b/notebook/agentchat_qdrant_RetrieveChat.ipynb
@@ -95,14 +95,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/oai_openai_utils.ipynb b/notebook/oai_openai_utils.ipynb
index 82dc865ef8a..24973e3d908 100644
--- a/notebook/oai_openai_utils.ipynb
+++ b/notebook/oai_openai_utils.ipynb
@@ -38,7 +38,7 @@
     "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "        \"temperature\": 0,\n",
diff --git a/website/blog/2023-10-26-TeachableAgent/index.mdx b/website/blog/2023-10-26-TeachableAgent/index.mdx
index 434e0946b54..51c2e56a38b 100644
--- a/website/blog/2023-10-26-TeachableAgent/index.mdx
+++ b/website/blog/2023-10-26-TeachableAgent/index.mdx
@@ -51,7 +51,7 @@ from autogen.agentchat.contrib.teachable_agent import TeachableAgent
 # and OAI_CONFIG_LIST_sample
 filter_dict = {"model": ["gpt-4"]}  # GPT-3.5 is less reliable than GPT-4 at learning from user feedback.
 config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict)
-llm_config={"config_list": config_list, "request_timeout": 120}
+llm_config={"config_list": config_list, "timeout": 120}
 ```
 
 4. Create the agents
diff --git a/website/docs/Examples/AutoGen-AgentChat.md b/website/docs/Examples/AgentChat.md
similarity index 95%
rename from website/docs/Examples/AutoGen-AgentChat.md
rename to website/docs/Examples/AgentChat.md
index a9a813ae6c1..961c44d5836 100644
--- a/website/docs/Examples/AutoGen-AgentChat.md
+++ b/website/docs/Examples/AgentChat.md
@@ -1,4 +1,4 @@
-# AutoGen - Automated Multi Agent Chat
+# Automated Multi Agent Chat
 
 AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation via multi-agent conversation.
 Please find documentation about this feature [here](/docs/Use-Cases/agent_chat).
@@ -25,7 +25,7 @@ Links to notebook examples:
 
    - Automated Chess Game Playing & Chitchatting by GPT-4 Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_chess.ipynb)
    - Automated Continual Learning from New Data - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb)
-   - [OptiGuide](https://github.com/microsoft/optiguide) - Large Language Models for Supply Chain Optimization.
+   - [OptiGuide](https://github.com/microsoft/optiguide) - Coding, Tool Using, Safeguarding & Question Anwering for Supply Chain Optimization
 
 4. **Tool Use**
 
diff --git a/website/docs/Examples/AutoGen-Inference.md b/website/docs/Examples/Inference.md
similarity index 96%
rename from website/docs/Examples/AutoGen-Inference.md
rename to website/docs/Examples/Inference.md
index d68504a1c7c..ad608985ec4 100644
--- a/website/docs/Examples/AutoGen-Inference.md
+++ b/website/docs/Examples/Inference.md
@@ -1,4 +1,4 @@
-# AutoGen - Tune GPT Models
+# Tune GPT Models
 
 AutoGen also offers a cost-effective hyperparameter optimization technique [EcoOptiGen](https://arxiv.org/abs/2303.04673) for tuning Large Language Models. The research study finds that tuning hyperparameters can significantly improve the utility of them.
 Please find documentation about this feature [here](/docs/Use-Cases/enhanced_inference).
diff --git a/website/docs/Getting-Started.md b/website/docs/Getting-Started.md
index fb16de2242a..63fc52f9455 100644
--- a/website/docs/Getting-Started.md
+++ b/website/docs/Getting-Started.md
@@ -8,11 +8,11 @@ AutoGen is a framework that enables development of LLM applications using multip
 
 ### Main Features
 
-* AutoGen enables building next-gen LLM applications based on **multi-agent conversations** with minimal effort. It simplifies the orchestration, automation and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcome their weaknesses.
-* It supports **diverse conversation patterns** for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
+- AutoGen enables building next-gen LLM applications based on [multi-agent conversations](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses.
+- It supports [diverse conversation patterns](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#supporting-diverse-conversation-patterns) for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
 the number of agents, and agent conversation topology.
-* It provides a collection of working systems with different complexities. These systems span a **wide range of applications** from various domains and complexities. They demonstrate how AutoGen can easily support different conversation patterns.
-* AutoGen provides **enhanced LLM inference**. It offers easy performance tuning, plus utilities like API unification & caching, and advanced usage patterns, such as error handling, multi-config inference, context programming etc.
+- It provides a collection of working systems with different complexities. These systems span a [wide range of applications](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#diverse-applications-implemented-with-autogen) from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns.
+- AutoGen provides [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification). It offers utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc.
 
 AutoGen is powered by collaborative [research studies](/docs/Research) from Microsoft, Penn State University, and University of Washington.
 
@@ -40,7 +40,7 @@ user_proxy.initiate_chat(assistant, message="Plot a chart of NVDA and TESLA stoc
 The figure below shows an example conversation flow with AutoGen.
 ![Agent Chat Example](/img/chat_example.png)
 
-* [Code examples](/docs/Examples/AutoGen-AgentChat).
+* [Code examples](/docs/Examples/AgentChat).
 * [Documentation](/docs/Use-Cases/agent_chat).
 
 #### Enhanced LLM Inferences
@@ -60,13 +60,13 @@ config, analysis = autogen.Completion.tune(
 response = autogen.Completion.create(context=test_instance, **config)
 ```
 
-* [Code examples](/docs/Examples/AutoGen-Inference).
+* [Code examples](/docs/Examples/Inference).
 * [Documentation](/docs/Use-Cases/enhanced_inference).
 
 ### Where to Go Next ?
 
 * Understand the use cases for [multi-agent conversation](/docs/Use-Cases/agent_chat) and [enhanced LLM inference](/docs/Use-Cases/enhanced_inference).
-* Find [code examples](/docs/Examples/AutoGen-AgentChat).
+* Find [code examples](/docs/Examples/AgentChat).
 * Read [SDK](/docs/reference/agentchat/conversable_agent/).
 * Learn about [research](/docs/Research) around AutoGen.
 * [Roadmap](https://github.com/orgs/microsoft/projects/989/views/3)
diff --git a/website/docs/Installation.md b/website/docs/Installation.md
index 2cacceda2c0..b9d892edb54 100644
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@@ -35,7 +35,7 @@ Now, you're ready to install AutoGen in the virtual environment you've just crea
 
 ## Python
 
-AutoGen requires **Python version >= 3.8**. It can be installed from pip:
+AutoGen requires **Python version >= 3.8, < 3.12**. It can be installed from pip:
 
 ```bash
 pip install pyautogen
@@ -49,6 +49,24 @@ or conda:
 conda install pyautogen -c conda-forge
 ``` -->
 
+### Migration guide to v0.2
+
+openai v1 is a total rewrite of the library with many breaking changes. For example, the inference requires instantiating a client, instead of using a global class method.
+Therefore, some changes are required for users of `pyautogen<0.2`.
+
+- `api_base` -> `base_url`, `request_timeout` -> `timeout` in `llm_config` and `config_list`. `max_retry_period` and `retry_wait_time` are deprecated. `max_retries` can be set for each client.
+- MathChat, RetrieveChat, and TeachableAgent are unsupported until they are tested in future release.
+- `autogen.Completion` and `autogen.ChatCompletion` are deprecated. The essential functionalities are moved to `autogen.OpenAIWrapper`:
+```python
+from autogen import OpenAIWrapper
+client = OpenAIWrapper(config_list=config_list)
+response = client.create(messages=[{"role": "user", "content": "2+2="}])
+print(client.extract_text_or_function_call(response))
+```
+- Inference parameter tuning and inference logging features are currently unavailable in `OpenAIWrapper`. Logging will be added in a future release.
+Inference parameter tuning can be done via [`flaml.tune`](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function).
+- `use_cache` is removed as a kwarg in `OpenAIWrapper.create()` for being automatically decided by `seed`: int | None.
+
 ### Optional Dependencies
 * docker
 
@@ -61,9 +79,9 @@ pip install docker
 
 * blendsearch
 
-AutoGen offers a cost-effective hyperparameter optimization technique [EcoOptiGen](https://arxiv.org/abs/2303.04673) for tuning Large Language Models. Please install with the [blendsearch] option to use it.
+`pyautogen<0.2` offers a cost-effective hyperparameter optimization technique [EcoOptiGen](https://arxiv.org/abs/2303.04673) for tuning Large Language Models. Please install with the [blendsearch] option to use it.
 ```bash
-pip install "pyautogen[blendsearch]"
+pip install "pyautogen[blendsearch]<0.2"
 ```
 
 Example notebooks:
@@ -72,9 +90,9 @@ Example notebooks:
 
 * retrievechat
 
-AutoGen supports retrieval-augmented generation tasks such as question answering and code generation with RAG agents. Please install with the [retrievechat] option to use it.
+`pyautogen<0.2` supports retrieval-augmented generation tasks such as question answering and code generation with RAG agents. Please install with the [retrievechat] option to use it.
 ```bash
-pip install "pyautogen[retrievechat]"
+pip install "pyautogen[retrievechat]<0.2"
 ```
 
 Example notebooks:
@@ -83,9 +101,9 @@ Example notebooks:
 
 * mathchat
 
-AutoGen offers an experimental agent for math problem solving. Please install with the [mathchat] option to use it.
+`pyautogen<0.2` offers an experimental agent for math problem solving. Please install with the [mathchat] option to use it.
 ```bash
-pip install "pyautogen[mathchat]"
+pip install "pyautogen[mathchat]<0.2"
 ```
 
 Example notebooks:
diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index 9062c1b3690..d834b7f3248 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -99,7 +99,7 @@ The figure below shows six examples of applications built using AutoGen.
 
    - Automated Chess Game Playing & Chitchatting by GPT-4 Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_chess.ipynb)
    - Automated Continual Learning from New Data - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb)
-   - [OptiGuide](https://github.com/microsoft/optiguide) - Large Language Models for Supply Chain Optimization.
+   - [OptiGuide](https://github.com/microsoft/optiguide) - Coding, Tool Using, Safeguarding & Question Anwering for Supply Chain Optimization
 
 4. **Tool Use**
 
diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index 21ce42c3ca1..5b5a1e81101 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -114,6 +114,23 @@ When chat models are used and `prompt` is given as the input to `autogen.Complet
 
 `autogen.OpenAIWrapper.create()` can be used to create completions for both chat and non-chat models, and both OpenAI API and Azure OpenAI API.
 
+```python
+from autogen import OpenAIWrapper
+# OpenAI endpoint
+client = OpenAIWrapper()
+# ChatCompletion
+response = client.create(messages=[{"role": "user", "content": "2+2="}], model="gpt-3.5-turbo")
+# extract the response text
+print(client.extract_text_or_function_call(response))
+# Azure OpenAI endpoint
+client = OpenAIWrapper(api_key=..., base_url=..., api_version=..., api_type="azure")
+# Completion
+response = client.create(prompt="2+2=", model="gpt-3.5-turbo-instruct")
+# extract the response text
+print(client.extract_text_or_function_call(response))
+
+```
+
 For local LLMs, one can spin up an endpoint using a package like [FastChat](https://github.com/lm-sys/FastChat), and then use the same API to send a request. See [here](/blog/2023/07/14/Local-LLMs) for examples on how to make inference with local LLMs.
 
 <!-- When only working with the chat-based models, `autogen.ChatCompletion` can be used. It also does automatic conversion from prompt to messages, if prompt is provided instead of messages. -->
@@ -122,6 +139,18 @@ For local LLMs, one can spin up an endpoint using a package like [FastChat](http
 
 API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
 
+```python
+client = OpenAIWrapper(seed=...)
+client.create(...)
+```
+
+```python
+client = OpenAIWrapper()
+client.create(seed=..., ...)
+```
+
+Caching is enabled by default with seed 41. To disable it please set `seed` to None.
+
 ## Error handling
 
 ### Runtime error
@@ -133,7 +162,7 @@ API call results are cached locally and reused when the same request is issued.
 - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
 
 Moreover,  -->
-One can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
+One can pass a list of configurations of different models/endpoints to mitigate the rate limits and other runtime error. For example,
 
 ```python
 client = OpenAIWrapper(
@@ -158,7 +187,7 @@ client = OpenAIWrapper(
 )
 ```
 
-It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama2-chat-7B one by one,
+`client.create()` will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama2-chat-7B one by one,
 until a valid result is returned. This can speed up the development process where the rate limit is a bottleneck. An error will be raised if the last choice fails. So make sure the last choice in the list has the best availability.
 
 For convenience, we provide a number of utility functions to load config lists.
@@ -184,8 +213,10 @@ def valid_json_filter(response, **_):
             pass
     return False
 
-response = client.create(
+client = OpenAIWrapper(
     config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo-instruct"}, {"model": "text-davinci-003"}],
+)
+response = client.create(
     prompt="How to construct a json request to Bing API to search for 'latest AI news'? Return the JSON request.",
     filter_func=valid_json_filter,
 )

From 1edddb872125c4f0b189a442d9696f251f1cbe9d Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Tue, 31 Oct 2023 16:25:28 +0000
Subject: [PATCH 30/35] deal with azure gpt-3.5

---
 autogen/oai/client.py         | 5 ++++-
 test/{ => oai}/test_client.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)
 rename test/{ => oai}/test_client.py (92%)

diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index b6035162104..96dc02d38de 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -90,7 +90,7 @@ def _process_for_azure(self, config: Dict, extra_kwargs: Dict, segment: str = "d
                 # remove the api_version from extra_kwargs
                 extra_kwargs.pop("api_version")
         if segment == "extra":
-            return config
+            return
         # deal with api_type
         api_type = extra_kwargs.get("api_type")
         if api_type is not None and api_type.startswith("azure") and headers_segment not in config:
@@ -102,6 +102,9 @@ def _process_for_azure(self, config: Dict, extra_kwargs: Dict, segment: str = "d
             model = extra_kwargs.get("model")
             if model is None:
                 return
+            if "gpt-3.5" in model:
+                # hack for azure gpt-3.5
+                extra_kwargs["model"] = model = model.replace("gpt-3.5", "gpt-35")
             base_url = config.get("base_url")
             if base_url is None:
                 raise ValueError("to use azure openai api, base_url must be specified.")
diff --git a/test/test_client.py b/test/oai/test_client.py
similarity index 92%
rename from test/test_client.py
rename to test/oai/test_client.py
index 99fefa34d03..c74332588b6 100644
--- a/test/test_client.py
+++ b/test/oai/test_client.py
@@ -1,6 +1,6 @@
 import pytest
 from autogen import OpenAIWrapper, config_list_from_json, config_list_openai_aoai
-from test_code import OAI_CONFIG_LIST, KEY_LOC
+from test_utils import OAI_CONFIG_LIST, KEY_LOC
 
 try:
     from openai import OpenAI
@@ -15,7 +15,7 @@ def test_aoai_chat_completion():
     config_list = config_list_from_json(
         env_or_file=OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={"api_type": ["azure"]},
+        filter_dict={"api_type": ["azure"], "model": ["gpt-3.5-turbo"]},
     )
     client = OpenAIWrapper(config_list=config_list)
     response = client.create(messages=[{"role": "user", "content": "2+2="}])

From 4555eb37f392d2b6b6946c30cee20411f52a67d5 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 1 Nov 2023 16:43:14 +0000
Subject: [PATCH 31/35] add back test_eval_math_responses

---
 test/agentchat/test_function_call.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/agentchat/test_function_call.py b/test/agentchat/test_function_call.py
index 9aea4ce4315..ef2ad5cc3ee 100644
--- a/test/agentchat/test_function_call.py
+++ b/test/agentchat/test_function_call.py
@@ -11,7 +11,7 @@
 
 
 @pytest.mark.skipif(OpenAI is None, reason="openai>=1 not installed")
-def _test_eval_math_responses():
+def test_eval_math_responses():
     config_list = autogen.config_list_from_models(
         KEY_LOC, exclude="aoai", model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"]
     )
@@ -36,8 +36,8 @@ def _test_eval_math_responses():
             },
         },
     ]
-    response = autogen.ChatCompletion.create(
-        config_list=config_list,
+    client = autogen.OpenAIWrapper(config_list=config_list)
+    response = client.create(
         messages=[
             {
                 "role": "user",
@@ -47,10 +47,10 @@ def _test_eval_math_responses():
         functions=functions,
     )
     print(response)
-    responses = autogen.ChatCompletion.extract_text_or_function_call(response)
+    responses = client.extract_text_or_function_call(response)
     print(responses[0])
-    function_call = responses[0]["function_call"]
-    name, arguments = function_call["name"], json.loads(function_call["arguments"])
+    function_call = responses[0].function_call
+    name, arguments = function_call.name, json.loads(function_call.arguments)
     assert name == "eval_math_responses"
     print(arguments["responses"])
     # if isinstance(arguments["responses"], str):
@@ -192,4 +192,4 @@ def get_number():
     # test_json_extraction()
     # test_execute_function()
     asyncio.run(test_a_execute_function())
-    # _test_eval_math_responses()
+    test_eval_math_responses()

From 3bf520a5e763d1318a0f8f4d5173c6edc6f0719b Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Wed, 1 Nov 2023 21:09:13 +0000
Subject: [PATCH 32/35] timeout

---
 test/agentchat/test_async_get_human_input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/agentchat/test_async_get_human_input.py b/test/agentchat/test_async_get_human_input.py
index cdc7ea2aa7e..fc8a8e31504 100644
--- a/test/agentchat/test_async_get_human_input.py
+++ b/test/agentchat/test_async_get_human_input.py
@@ -16,7 +16,7 @@ async def test_async_get_human_input():
     assistant = autogen.AssistantAgent(
         name="assistant",
         max_consecutive_auto_reply=2,
-        llm_config={"request_timeout": 600, "seed": 41, "config_list": config_list, "temperature": 0},
+        llm_config={"timeout": 600, "seed": 41, "config_list": config_list, "temperature": 0},
     )
 
     user_proxy = autogen.UserProxyAgent(name="user", human_input_mode="ALWAYS", code_execution_config=False)

From 8bb6e8297356a58696a120e3a88dd597b09be748 Mon Sep 17 00:00:00 2001
From: Li Jiang <bnujli@gmail.com>
Date: Sat, 4 Nov 2023 11:42:28 +0800
Subject: [PATCH 33/35] Add back tests for RetrieveChat (#480)

* Add back tests for RetrieveChat

* Fix format

* Update dependencies order

* Fix path

* Fix path

* Fix path

* Fix tests

* Add not run openai on MacOS or Win

* Update skip openai tests

* Remove unnecessary dependencies, improve format

* Add py3.8 for testing qdrant

* Fix multiline error of windows

* Add openai tests

* Add dependency mathchat, remove unused envs
---
 .github/workflows/build.yml                   |  2 +-
 .github/workflows/contrib-openai.yml          | 58 ++++++++++++++++++
 .github/workflows/contrib-tests.yml           | 59 +++++++++++++++++++
 setup.py                                      | 10 +---
 .../{ => contrib}/test_qdrant_retrievechat.py | 33 ++++++-----
 .../{ => contrib}/test_retrievechat.py        | 18 +++---
 test/test_retrieve_utils.py                   |  6 +-
 7 files changed, 148 insertions(+), 38 deletions(-)
 create mode 100644 .github/workflows/contrib-openai.yml
 create mode 100644 .github/workflows/contrib-tests.yml
 rename test/agentchat/{ => contrib}/test_qdrant_retrievechat.py (81%)
 rename test/agentchat/{ => contrib}/test_retrievechat.py (84%)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 2d6b3cfcb44..6432bf0e529 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -49,7 +49,7 @@ jobs:
       - name: Coverage
         if: matrix.python-version == '3.10'
         run: |
-          pip install -e.[mathchat,test]
+          pip install -e .[mathchat,test]
           pip uninstall -y openai
           coverage run -a -m pytest test
           coverage xml
diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
new file mode 100644
index 00000000000..155d951e92f
--- /dev/null
+++ b/.github/workflows/contrib-openai.yml
@@ -0,0 +1,58 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: OpenAI4ContribTests
+
+on:
+  pull_request:
+    branches: ['main', 'dev/v0.2']
+    paths:
+      - 'autogen/**'
+      - 'test/agentchat/contrib/**'
+      - '.github/workflows/contrib-openai.yml'
+      - 'setup.py'
+
+jobs:
+  RetrieveChatTest:
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.10"]
+    runs-on: ${{ matrix.os }}
+    environment: openai1
+    steps:
+      # checkout to pr branch
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies
+        run: |
+          docker --version
+          python -m pip install --upgrade pip wheel
+          pip install -e .
+          python -c "import autogen"
+          pip install coverage pytest-asyncio
+      - name: Install packages for test when needed
+        run: |
+          pip install docker
+          pip install qdrant_client[fastembed]
+          pip install -e .[retrievechat]
+      - name: Coverage
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
+          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
+        run: |
+          coverage run -a -m pytest test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py
+          coverage xml
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
new file mode 100644
index 00000000000..75381892658
--- /dev/null
+++ b/.github/workflows/contrib-tests.yml
@@ -0,0 +1,59 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: ContribTests
+
+on:
+  pull_request:
+    branches: ['main', 'dev/v0.2']
+    paths:
+      - 'autogen/**'
+      - 'test/agentchat/contrib/**'
+      - '.github/workflows/contrib-tests.yml'
+      - 'setup.py'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  RetrieveChatTest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-2019]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies for all tests
+        run: |
+          python -m pip install --upgrade pip wheel
+          pip install pytest
+      - name: Install qdrant_client when python-version is 3.10
+        if: matrix.python-version == '3.10' || matrix.python-version == '3.8'
+        run: |
+          pip install qdrant_client[fastembed]
+      - name: Install packages and dependencies for RetrieveChat
+        run: |
+          pip install -e .[retrievechat]
+          pip uninstall -y openai
+      - name: Test RetrieveChat
+        run: |
+          pytest test/test_retrieve_utils.py test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py
+      - name: Coverage
+        if: matrix.python-version == '3.10'
+        run: |
+          pip install coverage>=5.3
+          coverage run -a -m pytest test/test_retrieve_utils.py test/agentchat/contrib
+          coverage xml
+      - name: Upload coverage to Codecov
+        if: matrix.python-version == '3.10'
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
diff --git a/setup.py b/setup.py
index d276a4912f3..b9977ed5ec1 100644
--- a/setup.py
+++ b/setup.py
@@ -40,25 +40,17 @@
     install_requires=install_requires,
     extras_require={
         "test": [
-            # "chromadb",
-            # "lancedb",
             "coverage>=5.3",
-            # "datasets",
             "ipykernel",
             "nbconvert",
             "nbformat",
             "pre-commit",
-            # "pydantic==1.10.9",
             "pytest-asyncio",
             "pytest>=6.1.1",
-            # "sympy",
-            # "tiktoken",
-            # "wolframalpha",
-            # "qdrant_client[fastembed]",
         ],
         "blendsearch": ["flaml[blendsearch]"],
         "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"],
-        "retrievechat": ["chromadb", "tiktoken", "sentence_transformers", "pypdf", "ipython"],
+        "retrievechat": ["chromadb", "sentence_transformers", "pypdf", "ipython"],
         "teachable": ["chromadb"],
     },
     classifiers=[
diff --git a/test/agentchat/test_qdrant_retrievechat.py b/test/agentchat/contrib/test_qdrant_retrievechat.py
similarity index 81%
rename from test/agentchat/test_qdrant_retrievechat.py
rename to test/agentchat/contrib/test_qdrant_retrievechat.py
index 3202b04a0df..1d3c5afd6af 100644
--- a/test/agentchat/test_qdrant_retrievechat.py
+++ b/test/agentchat/contrib/test_qdrant_retrievechat.py
@@ -1,10 +1,11 @@
 import os
-
+import sys
 import pytest
-
 from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
-from autogen import ChatCompletion, config_list_from_json
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
+from autogen import config_list_from_json
+
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 try:
     from qdrant_client import QdrantClient
@@ -19,25 +20,27 @@
 except ImportError:
     QDRANT_INSTALLED = False
 
-test_dir = os.path.join(os.path.dirname(__file__), "..", "test_files")
+try:
+    import openai
+
+    OPENAI_INSTALLED = True
+except ImportError:
+    OPENAI_INSTALLED = False
 
+test_dir = os.path.join(os.path.dirname(__file__), "../..", "test_files")
 
-@pytest.mark.skipif(not QDRANT_INSTALLED, reason="qdrant_client is not installed")
-def test_retrievechat():
-    try:
-        import openai
-    except ImportError:
-        return
 
+@pytest.mark.skipif(
+    sys.platform in ["darwin", "win32"] or not QDRANT_INSTALLED or not OPENAI_INSTALLED,
+    reason="do not run on MacOS or windows or dependency is not installed",
+)
+def test_retrievechat():
     conversations = {}
-    # ChatCompletion.start_logging(conversations)
+    # ChatCompletion.start_logging(conversations)  # deprecated in v0.2
 
     config_list = config_list_from_json(
         OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={
-            "model": ["gpt-4", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"],
-        },
     )
 
     assistant = RetrieveAssistantAgent(
diff --git a/test/agentchat/test_retrievechat.py b/test/agentchat/contrib/test_retrievechat.py
similarity index 84%
rename from test/agentchat/test_retrievechat.py
rename to test/agentchat/contrib/test_retrievechat.py
index 3e6d65d5a36..d701ebc532e 100644
--- a/test/agentchat/test_retrievechat.py
+++ b/test/agentchat/contrib/test_retrievechat.py
@@ -1,9 +1,13 @@
 import pytest
+import os
 import sys
 import autogen
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
+
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 try:
+    import openai
     from autogen.agentchat.contrib.retrieve_assistant_agent import (
         RetrieveAssistantAgent,
     )
@@ -20,23 +24,15 @@
 
 @pytest.mark.skipif(
     sys.platform in ["darwin", "win32"] or skip_test,
-    reason="do not run on MacOS or windows",
+    reason="do not run on MacOS or windows or dependency is not installed",
 )
 def test_retrievechat():
-    try:
-        import openai
-    except ImportError:
-        return
-
     conversations = {}
-    # autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)  # deprecated in v0.2
 
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={
-            "model": ["gpt-4", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"],
-        },
     )
 
     assistant = RetrieveAssistantAgent(
diff --git a/test/test_retrieve_utils.py b/test/test_retrieve_utils.py
index 9d08d171ef6..2423c1e0a2e 100644
--- a/test/test_retrieve_utils.py
+++ b/test/test_retrieve_utils.py
@@ -28,7 +28,7 @@
 integration, testing, and deployment."""
 
 
-@pytest.mark.skipif(skip, reason="dependency not installed")
+@pytest.mark.skipif(skip, reason="dependency is not installed")
 class TestRetrieveUtils:
     def test_split_text_to_chunks(self):
         long_text = "A" * 10000
@@ -167,7 +167,9 @@ def custom_text_split_function(text):
 
     def test_retrieve_utils(self):
         client = chromadb.PersistentClient(path="/tmp/chromadb")
-        create_vector_db_from_dir(dir_path="./website/docs", client=client, collection_name="autogen-docs")
+        create_vector_db_from_dir(
+            dir_path="./website/docs", client=client, collection_name="autogen-docs", get_or_create=True
+        )
         results = query_vector_db(
             query_texts=[
                 "How can I use AutoGen UserProxyAgent and AssistantAgent to do code generation?",

From a786cb678ca93569fdb2d9ddc3566477e2e2a2c3 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sat, 4 Nov 2023 03:51:17 +0000
Subject: [PATCH 34/35] retrieve chat is tested

---
 website/docs/Installation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/Installation.md b/website/docs/Installation.md
index b9d892edb54..508f207765b 100644
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@@ -55,7 +55,7 @@ openai v1 is a total rewrite of the library with many breaking changes. For exam
 Therefore, some changes are required for users of `pyautogen<0.2`.
 
 - `api_base` -> `base_url`, `request_timeout` -> `timeout` in `llm_config` and `config_list`. `max_retry_period` and `retry_wait_time` are deprecated. `max_retries` can be set for each client.
-- MathChat, RetrieveChat, and TeachableAgent are unsupported until they are tested in future release.
+- MathChat, TeachableAgent are unsupported until they are tested in future release.
 - `autogen.Completion` and `autogen.ChatCompletion` are deprecated. The essential functionalities are moved to `autogen.OpenAIWrapper`:
 ```python
 from autogen import OpenAIWrapper

From 1f1459b27c5a41ad889e057157302039939b1992 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sat, 4 Nov 2023 03:59:33 +0000
Subject: [PATCH 35/35] bump version to 0.2.0b1

---
 .github/workflows/contrib-openai.yml | 6 +++---
 .github/workflows/openai.yml         | 4 ++--
 autogen/version.py                   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
index 155d951e92f..2f250071639 100644
--- a/.github/workflows/contrib-openai.yml
+++ b/.github/workflows/contrib-openai.yml
@@ -1,11 +1,11 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# This workflow will install Python dependencies and run tests
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
 name: OpenAI4ContribTests
 
 on:
-  pull_request:
-    branches: ['main', 'dev/v0.2']
+  pull_request_target:
+    branches: ['main']
     paths:
       - 'autogen/**'
       - 'test/agentchat/contrib/**'
diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index c56028dac00..6f966337bed 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -1,10 +1,10 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# This workflow will install Python dependencies and run tests with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
 name: OpenAI
 
 on:
-  pull_request:
+  pull_request_target:
     branches: ['main']
     paths:
       - 'autogen/**'
diff --git a/autogen/version.py b/autogen/version.py
index fb69db9cf4d..a7621268bde 100644
--- a/autogen/version.py
+++ b/autogen/version.py
@@ -1 +1 @@
-__version__ = "0.1.14"
+__version__ = "0.2.0b1"